-
Notifications
You must be signed in to change notification settings - Fork 0
/
darts.py
156 lines (125 loc) · 4.16 KB
/
darts.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#
# Darts playing model for CS181.
#
import sys
import time
import random
import throw
import mdp
import modelbased
import modelfree
import numpy as np
GAMMA = 0.5
EPOCH_SIZE = 10
# <CODE HERE>: Complete this function, which should return a
# list of all possible states.
def get_states():
# should return a **list** of states. Each state should be an integer.
return range(throw.START_SCORE + 1)
# Returns a list of all possible actions, or targets, which include both a
# wedge number and a ring.
def get_actions():
actions = []
for wedge in throw.wedges:
actions = actions + [throw.location(throw.CENTER, wedge)]
actions = actions + [throw.location(throw.INNER_RING, wedge)]
actions = actions + [throw.location(throw.FIRST_PATCH, wedge)]
actions = actions + [throw.location(throw.MIDDLE_RING, wedge)]
actions = actions + [throw.location(throw.SECOND_PATCH, wedge)]
actions = actions + [throw.location(throw.OUTER_RING, wedge)]
return actions
# <CODE HERE>: Define the reward function
def R(s,a):
# takes a state s and action a
# returns the reward for completing action a in state s
reward = 0.0
for s_prime in range(s):
reward += mdp.T(a, s, s_prime) * (s - s_prime)
return reward
def R_simple(s,a):
# takes a state s and action a
# returns the reward for completing action a in state s
points = throw.location_to_score(a)
if points <= s:
return points
return 0
# Play a single game
def play(method):
score = throw.START_SCORE
turns = 0
if method == "mdp":
target = mdp.start_game(GAMMA)
else:
target = modelfree.start_game()
targets = []
results = []
while(True):
turns = turns + 1
result = throw.throw(target)
targets.append(target)
results.append(result)
raw_score = throw.location_to_score(result)
print "Target: wedge", target.wedge,", ring", target.ring
print "Result: wedge", result.wedge,", ring", result.ring
print "Raw Score:", raw_score
print "Score:", score
if raw_score <= score:
score = int(score - raw_score)
else:
print
print "TOO HIGH!"
if score == 0:
break
if method == "mdp":
target = mdp.get_target(score)
else:
target = modelfree.get_target(score)
print "WOOHOO! It only took", turns, " turns"
#end_game(turns)
return turns
# Play n games and return the average score.
def test(n, method):
score = 0
for i in range(n):
score += play(method)
print "Average turns = ", float(score)/float(n)
return score
# <CODE HERE>: Feel free to modify the main function to set up your experiments.
def main():
throw.init_board()
num_games = 1000
#************************************************#
# Uncomment the lines below to run the mdp code, #
# using the simple dart thrower that matches #
# the thrower specified in question 2. #
#*************************************************
# Default is to solve MDP and play 1 game
throw.use_simple_thrower()
test(100, "mdp")
#*************************************************#
# Uncomment the lines below to run the modelbased #
# code using the complex dart thrower. #
#*************************************************#
# Seed the random number generator -- the default is
# the current system time. Enter a specific number
# into seed() to keep the dart thrower constant across
# multiple calls to main().
# Then, initialize the throwing model and run
# the modelbased algorithm.
random.seed(181)
throw.init_thrower()
f = open("q4a_data_strat1.csv", "w")
f.write("EPOCH_SIZE, AVG_TURNS\n")
avg_turns = modelbased.modelbased(GAMMA, 5, 100)
f.write("{0}, {1}\n".format(1, avg_turns))
#*************************************************#
# Uncomment the lines below to run the modelfree #
# code using the complex dart thrower. #
#*************************************************#
# Plays 1 game using a default player. No modelfree
# code is provided.
#random.seed(181)
#throw.init_thrower()
#test(100, "modelfree")
if __name__ =="__main__":
main()