-
Notifications
You must be signed in to change notification settings - Fork 0
/
evaluator.py
130 lines (110 loc) · 3.93 KB
/
evaluator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
from game import TetrisGame
from random import randint
#from itertools import accumulate
def accumulate(lis):
out = []
acc = 0
for x in lis:
acc = acc + x
out.append(acc)
return out
def generate_piece():
pieces = ["S", "Z"]
return pieces[randint(0,1)]
def states_gen(agent, noOfItems, rows, cols):
states = []
statesChk = {}
states.append([[0 for col in range(cols)] for row in range(rows)])
statesChk[str(states[0])] = True
gameBoard = TetrisGame(rows, cols)
count = 0
while count < noOfItems:
agent.reset()
gameBoard.setNewState(states[0])
while True:
piece = generate_piece()
loc = agent.getLocation(piece, gameBoard)
reward = gameBoard.addPiece(piece, loc[0], loc[1])
rewardSeen.append(reward)
if reward < 0:
break
strState = str(gameBoard.getState())
if strState not in statesChk:
states.append(gameBoard.getStateCopy())
statesChk[strState] = True
count = count + 1
return states
def monteCarloEval(agent, noOfItems, dataPoints, rows, cols, init_states):
stRewards = {}
states = init_states
for x in states:
stRewards[str(x)] = [0, 0]
if str([[0 for col in range(cols)] for row in range(rows)]) not in stRewards:
states.append([[0 for col in range(cols)] for row in range(rows)])
stRewards[str(states[0])] = [0,0]
gameBoard = TetrisGame(rows, cols)
count = 0
ind_st = 0
while count < noOfItems:
#while stRewards[str(states[0])][1] < noOfItems:
#stInd = randint(0, len(states) - 1)
agent.reset()
statesSeen = {}
rewardSeen = []
#gameBoard.setNewState(states[0])
#statesSeen[str(states[0])] = 0
#print(ind)
if len(states) < noOfItems:
gameBoard.setNewState(states[0])
statesSeen[str(states[0])] = 0
if len(states) > noOfItems:
for j in range(ind_st, len(states)):
temp = str(states[j])
tup = stRewards[temp]
if tup[1] < dataPoints:
ind_st = j
break
gameBoard.setNewState(states[ind_st])
statesSeen[str(states[ind_st])] = 0
while True:
piece = generate_piece()
loc = agent.getLocation(piece, gameBoard)
reward = gameBoard.addPiece(piece, loc[0], loc[1])
rewardSeen.append(reward)
if reward < 0:
break
strState = str(gameBoard.getState())
#print(strState)
if strState not in stRewards:
states.append(gameBoard.getStateCopy())
if strState not in statesSeen:
statesSeen[strState] = len(rewardSeen)
#print(sum(rewardSeen))
for visitedSt, ind in statesSeen.items():
#rewardSum = list(accumulate(list(reversed(rewardSeen))))
rewardSum = accumulate(list(reversed(rewardSeen)))
if visitedSt in stRewards:
tup = stRewards[visitedSt]
tup[1] = tup[1] + 1
if tup[1] == dataPoints:
count = count + 1
tup[0] = tup[0] + rewardSum[-(1 + ind)]
stRewards[visitedSt] = tup
else:
stRewards[visitedSt] = [rewardSum[-(1 + ind)], 1]
if 1 == dataPoints:
count = count + 1
#print(rewardSum[-1])
#print(count)
#print(str(len(states)))
#for key, val in stRewards.items():
# print(str(val))
# print('\n')
#print(len(states))
#print(stRewards[str(states[0])])
#print(sum([sum(x) for x in states[0]]))
#with open(fileV, "w") as V:
# V.write(str(stRewards))
#with open(fileS, "w") as S:
# S.write(str(states))
return [stRewards, states]