Example #1
0
def updateQ(c_state, action, alpha, gamma):

	num_s = add_state(c_state)
	num_a = lists.action_list.index(action)
	reward = 0 #get_reward()
	piece = c_state[1]
	n_board_state, line_reward = pt.answer(c_state[0], piece[1], piece[2], action[0], action[1])
	for i in range(len(n_board_state)):
		for j in range(len(n_board_state[i])):
			if n_board_state[i][j] > 0 and n_board_state[i][j] < 8:	n_board_state[i][j] = 1
	n_state = [n_board_state, pt.getNextPiece()]
	num_sn = add_state(n_state)
	if num_sn:	pass
	else:num_sn = add_state(n_state)
#	else:
#		lists.state_list.append(n_state)
#		num_sn = lists.state_list.index(n_state)
	Q_s_a = get_value(num_s, num_a)
	r_s_a, height = get_reward(c_state, action)
	max_Q = get_max_Qvalue(num_sn)

	value = Q_s_a + alpha * (r_s_a + gamma * max_Q - Q_s_a)
	if len(lists.Qtable)<=num_s or len(lists.Qtable[num_s])<=num_a:
		for i in range(len(lists.Qtable), num_s+1):
			lists.Qtable.append([])
		for i in range(len(lists.Qtable[num_s]), num_a+1):
			lists.Qtable[num_s].append(0)
	lists.Qtable[num_s][num_a] = value
	return 
Example #2
0
def get_reward(c_state, action):
	#- c_board_state:	現在の盤面の状態
	#- scores = [40, 100, 300, 1200]
	#- index: action_listの中でQを最大化するactionを示す番号
	c_board_state,piece = c_state
	n_board_state, line_reward = pt.answer(c_board_state, piece[1], piece[2], action[0], action[1])
	erace_line = line(line_reward)	#-	消したライン数
	block_reward = reward = 0
##	for i in range(0,len(n_board_state)):
##		for j in range(0,len(n_board_state[i])):
##			if n_board_state[i][j] != 0 and n_board_state[i][j] != 8:	n_board_state[i][j] = 1
	
	#- 一番下の空きスペースを埋めた分だけrewardを与える.隙間を作ったら減点
##	trans_c = zip(*c_board_state)
##	trans_n = zip(*n_board_state)
	max_h = 7#21

##	for width in range(1, 5):#11
##		r = 0

#		if trans_c[width][:22 - erace_line] != trans_n[width][erace_line:22]:
##                if trans_c[width][:8 - erace_line] != trans_n[width][erace_line:8]:
##			r = 3
#			l = [num for num in list(trans_n[width]) if num > 0]
#			max_1 = l[0]
#			start = list(trans_n[width]).index(max_1)
#			if l.count(0) + 2 <= len(l): max_2 = l[1]
#			else:	max_2 = 8
#			if max_2 == max_1:
#				max_2 = [num for num in list(trans_n[width]) if num != max_1 and num > 0][0]
#				print "max_1:",max_1,"max_2:",max_2
#			start = list(trans_n[width]).index(max_1)
#			final = list(trans_n[width]).index(max_2)

##			if [num for num in list(trans_n[width]) if num == 1] != []:
##				start = list(trans_n[width]).index(1)

##				r -= list(trans_n[width])[erace_line + start:].count(0)
##			else:	start = 8#22
##			r -= 6 - start#20
##			if max_h > start:
##				max_h = start
##		block_reward += r


        block_reward = 100/(numpy.max(c_board_state) - numpy.min(c_board_state))
        
	reward = block_reward + line_reward
	return reward, max_h
Example #3
0
       [8,0,0,0,0,0,0,0,0,0,0,8],
       [8,0,0,0,0,0,0,0,0,0,0,8],
       [8,0,0,0,0,0,0,0,0,0,0,8],
       [8,0,0,0,0,0,0,0,0,0,0,8],
       [8,0,0,0,0,0,0,0,0,0,0,8],
       [8,0,0,0,0,0,0,0,0,0,0,8],
       [8,8,8,8,8,8,8,8,8,8,8,8],
       [8,8,8,8,8,8,8,8,8,8,8,8],
       [8,8,8,8,8,8,8,8,8,8,8,8],
       [8,8,8,8,8,8,8,8,8,8,8,8],
       [8,8,8,8,8,8,8,8,8,8,8,8],
       [8,8,8,8,8,8,8,8,8,8,8,8]]

b = pt.getBoard()

ans = pt.answer(pt.getBoard(),pt.getPiece()[1], pt.getPiece()[2], 0, 2)
#ans = pt.answer([2,3,4,5,4,3],pt.getPiece()[1], pt.getPiece()[2], 0, 2)

pprint(ans)
pt.rotate('ccw')
pt.rotate('ccw')

# メインループ
while(pt.alive()):
    #########################
    # ループごとの解析、操作をここに書く

    pt.drop()
    # 次のループへ
    pt.loop()
    step += 1