-
Notifications
You must be signed in to change notification settings - Fork 0
/
threads.py
143 lines (106 loc) · 6.01 KB
/
threads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from agent import Agent
from learner import Learner
from replay_memory import ReplayMemory
import environment
import displays
import time
from multiprocessing import Process, Queue, Manager
def thread_agent(config, road_environment, memory_queue, environment_queue, agent_performance_queue, weights, lock_weights):
agent = Agent(config, road_environment, environment_queue, agent_performance_queue, memory_queue, 'agent_1')
agent.train(weights, lock_weights)
return
def thread_learner(config, batch_queue, learner_performance_queue, update_p_queue, weights, lock_weights):
learner = Learner(config, batch_queue, learner_performance_queue, update_p_queue)
learner.learn(weights, lock_weights)
return
def thread_memory(config, memory_queue, batch_queue, update_p_queue, priority_environment_queue):
memory = ReplayMemory(config, memory_queue, batch_queue, update_p_queue, priority_environment_queue)
memory.loop()
return
def thread_agent_test(config, test_environment, test_environment_queue, test_agent_performance_queue, weights, lock_weights, memory_queue):
agent = Agent(config, test_environment, test_environment_queue, test_agent_performance_queue, memory_queue, 'agent_test', test_environment_queue)
agent.test(weights, lock_weights)
return
def thread_agent_priority(config, test_environment, priority_environment_queue, memory_queue, test_environment_queue, weights, lock_weights):
agent = Agent(config, test_environment, priority_environment_queue, None, memory_queue, 'agent_priority', test_environment_queue)
agent.create_prioritized_memories(weights, lock_weights)
return
def thread_game_display(road_environment, environment_queue):
display_game = displays.GameDisplay(road_environment)
while True:
# print(environment_queue.qsize()) # Used to check if display can keep up
road_environment = environment_queue.get()
display_game.render(road_environment)
def thread_test_display(test_environment, test_environment_queue):
display_test = displays.GameDisplay(test_environment)
while True:
test_environment = test_environment_queue.get()
display_test.render(test_environment)
def one_run(config):
test_environment = environment.EnvRoad(config)
road_environment = environment.EnvRoad(config)
memory_queue = Queue()
update_p_queue = Queue()
batch_queue = Queue()
environment_queue = Queue()
test_environment_queue = Queue()
agent_performance_queue = Queue()
learner_performance_queue = Queue()
test_agent_performance_queue = Queue()
priority_environment_queue = Queue()
manger_weights = Manager()
weights = manger_weights.dict()
lock_weights = manger_weights.Lock()
p_agent = Process(target=thread_agent, args=(config, road_environment, memory_queue, environment_queue, agent_performance_queue, weights, lock_weights))
p_learn = Process(target=thread_learner, args=(config, batch_queue, learner_performance_queue, update_p_queue, weights, lock_weights))
p_mem = Process(target=thread_memory, args=(config, memory_queue, batch_queue, update_p_queue, priority_environment_queue))
p_agent_test = Process(target=thread_agent_test, args=(config, test_environment, test_environment_queue, test_agent_performance_queue, weights, lock_weights, memory_queue))
p_agent_priority = Process(target=thread_agent_priority, args=(config, test_environment, priority_environment_queue, memory_queue, test_environment_queue, weights, lock_weights))
start_time = time.time()
p_agent.start()
p_mem.start()
p_learn.start()
p_agent_test.start()
p_agent_priority.start()
if config.display:
p_display_game = Process(target=thread_game_display, args=(road_environment, environment_queue))
p_display_test = Process(target=thread_test_display, args=(test_environment, test_environment_queue))
p_display_game.start()
p_display_test.start()
display_agent = displays.AgentDisplays(config)
display_learner = displays.LearnerDisplays(config)
display_test = displays.TestDisplays(config)
learner_performance = [[],[],[]]
agent_performance = [[],[]]
test_agent_performance = [[],[]]
end = False
# Display all performance metrics
while True:
while agent_performance_queue.qsize() > 0:
agent_performance = agent_performance_queue.get()
if len(agent_performance[0]) == len(agent_performance[1]) and config.display:
display_agent.render(agent_performance[0], agent_performance[1])
if len(agent_performance[0]) > config.max_step:
end = True
while learner_performance_queue.qsize() > 0:
learner_performance = learner_performance_queue.get()
if len(learner_performance[0]) == len(learner_performance[1]) == len(learner_performance[2]) and config.display:
display_learner.render(learner_performance[0], learner_performance[1], learner_performance[2])
while test_agent_performance_queue.qsize() > 0:
test_agent_performance = test_agent_performance_queue.get()
if len(test_agent_performance[0]) == len(test_agent_performance[1]) and config.display:
display_test.render(test_agent_performance[0], test_agent_performance[1])
if end:
if config.display:
displays.close_all()
p_display_game.terminate()
p_display_test.terminate()
break
p_agent.terminate()
p_mem.terminate()
p_learn.terminate()
p_agent_test.terminate()
p_agent_priority.terminate()
print('Average time per step: ', str((time.time()-start_time)/config.max_step))
return agent_performance[0], agent_performance[1], learner_performance[0], learner_performance[1], \
learner_performance[0], learner_performance[2], test_agent_performance[0], test_agent_performance[1]