/
train_runner.py
48 lines (36 loc) · 1.59 KB
/
train_runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import subprocess
from datetime import datetime
import portpicker
from config_generator import ConfigGenerator
from summaries_reader import SummariesReader
class TrainRunner(object):
def __init__(self, env_name):
self.env_name = env_name
self.conf_gen = ConfigGenerator()
def f(self, params):
"""
Function to optimize
Runs a training process and wait for its termination
"""
run_id = self.env_name + '_' + datetime.now().strftime("%Y-%m-%d_%H-%M-%S.%f")
conf_path = self.conf_gen.generate(self.env_name, params, run_id, params_dict_format=False)
proc = self.start_train_process(conf_path, run_id)
for line in iter(proc.stdout.readline, b''):
print('[{0}] {1}'.format(proc.pid, line.decode('utf-8')), end='')
proc.wait()
reward = SummariesReader(run_id).get_scalar('Info/cumulative_reward')[-1].value
return reward
def start_train_process(self, conf_path, run_id, options=None):
unused_port = portpicker.pick_unused_port()
command = ['python', 'learn.py', self.env_name, '--train', '--worker-id=' + str(unused_port),
'--trainer-config-path=' + str(conf_path), '--run-id=' + run_id]
if options:
command.append(options)
proc = subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
return proc
if __name__ == '__main__':
train_runner = TrainRunner('test123')
reward = train_runner.f([[1, 1]])
print(reward)