def test_actor_task(self): actor_id = str(uuid.uuid1()) learner_id = str(uuid.uuid1()) league_client = LeagueMgrAPIs(league_mgr_addr="localhost:11007") learner_task = league_client.request_learner_task(learner_id=learner_id) league_client.notify_learner_task_begin(learner_id=learner_id, learner_task=learner_task) model_client = ModelPoolAPIs(model_pool_addrs=["localhost:11001:11006"]) hyperparam = MutableHyperparam() model_client.push_model(None, hyperparam, str(uuid.uuid1())) task = league_client.request_actor_task(actor_id=actor_id, learner_id=learner_id) self.assertTrue(isinstance(task, ActorTask)) league_client.notify_actor_task_begin(actor_id=actor_id) league_client.notify_actor_task_end( actor_id=actor_id, match_result=MatchResult(task.model_key1, task.model_key2, 1))
def test_pull_hyperparam(self): client = ModelPoolAPIs(model_pool_addrs=[ "localhost:11001:11006", "localhost:11002:11007" ]) key1 = str(uuid.uuid1()) key2 = str(uuid.uuid1()) client.push_model(None, "any_hyperparam_object", key1) client.push_model(None, "any_hyperparam_object", key2) client.push_model(None, "updated_hyperparam_object", key2) hyperparam1 = client.pull_attr('hyperparam', key1) self.assertEqual(hyperparam1, "any_hyperparam_object") hyperparam2 = client.pull_attr('hyperparam', key2) self.assertEqual(hyperparam2, "updated_hyperparam_object")
def test_pull_model(self): client = ModelPoolAPIs(model_pool_addrs=[ "localhost:11001:11006", "localhost:11002:11007" ]) key1 = str(uuid.uuid1()) key2 = str(uuid.uuid1()) client.push_model("any_model_object", None, key1) client.push_model("any_model_object", None, key2) client.push_model("updated_model_object", None, key2) model1 = client.pull_model(key1) self.assertEqual(model1.model, "any_model_object") model2 = client.pull_model(key2) self.assertEqual(model2.model, "updated_model_object")
def __init__(self, league_mgr_addr, model_pool_addrs, learner_ports, learner_id=''): if learner_id: self._learner_id = learner_id else: self._learner_id = str(uuid.uuid1()) self._zmq_context = zmq.Context() self._rep_socket = self._zmq_context.socket(zmq.REP) self._rep_socket.bind("tcp://*:%s" % learner_ports[0]) self._pull_socket = self._zmq_context.socket(zmq.PULL) self._pull_socket.setsockopt(zmq.RCVHWM, 1) self._pull_socket.bind("tcp://*:%s" % learner_ports[1]) self._message_thread = Thread(target=self._message_worker) self._message_thread.daemon = True self._message_thread.start() self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr) self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) self.task = None self.model_key = None self.last_model_key = None self._lrn_period_count = 0 # learning period count self._pull_lock = Lock()
def test_pull_keys(self): client = ModelPoolAPIs(model_pool_addrs=[ "localhost:11001:11006", "localhost:11002:11007" ]) key1 = str(uuid.uuid1()) key2 = str(uuid.uuid1()) client.push_model(None, None, key1) client.push_model(None, None, key2) client.push_model(None, None, key1) saved_keys = client.pull_keys() self.assertEqual(len(saved_keys), 2) self.assertTrue(key1 in saved_keys) self.assertTrue(key2 in saved_keys)
def main(_): model_pool_apis = ModelPoolAPIs(FLAGS.model_pool_addrs.split(',')) keys = model_pool_apis.pull_keys() for key, model_path in zip(FLAGS.model_key, FLAGS.model_path): if key in keys: m = model_pool_apis.pull_model(key) with open(model_path, 'rb') as f: model = pickle.load(f) if isinstance(model, Model): model = model.model model_pool_apis.push_model(model, m.hyperparam, m.key, m.createtime, m.freezetime, m.updatetime)
def __init__(self, league_mgr_addr, model_pool_addrs, learner_addr=None, verbose=0, log_interval_steps=51): ip, hostname = get_ip_hostname() self._actor_id = hostname + '@' + ip + ':' + str(uuid.uuid1())[:8] self._learner_id = None self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr) self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) if learner_addr: self._learner_apis = LearnerAPIs(learner_addr) self._learner_id = self._learner_apis.request_learner_id() self._log_interval_steps = log_interval_steps logger.configure(dir=None, format_strs=['stdout']) logger.set_level(verbose) self.task = None self._steps = 0
def test1(): from tleague.model_pools.model_pool import ModelPool from tleague.model_pools.model_pool_apis import ModelPoolAPIs from multiprocessing import Process server_process = Process( target=lambda: ModelPool(ports="11001:11006").run()) server_process.start() model_pool_apis = ModelPoolAPIs(["localhost:11001:11006"]) model_pool_apis.push_model('model1', None, 'model1') saver = ChkptsFromModelPool(model_pool_apis) saver._save_model_checkpoint('./', 'test') model_pool_apis.push_model('Modified_model1', None, 'model1') saver._restore_model_checkpoint('./test') model = model_pool_apis.pull_model('model1') server_process.terminate()
def __init__(self, league_mgr_addr, model_pool_addrs, port, ds, batch_size, ob_space, ac_space, policy, outputs=['a'], policy_config={}, gpu_id=0, compress=True, batch_worker_num=4, update_model_seconds=60, learner_id=None, log_seconds=60, model_key="", task_attr='model_key', **kwargs): self._update_model_seconds = update_model_seconds self._log_seconds = log_seconds self._learner_id = learner_id self._task_attr = task_attr.split('.') if model_key: # If model_key is given, this indicates the infserver works # for a fixed model inference self._league_mgr_apis = None self.is_rl = False self.model_key = model_key else: # If model_key is absent, this indicates an infserver # that performs varying policy inference, and model_key will be # assigned by querying league_mgr self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr) self.is_rl = True self.model_key = None self.model = None self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) assert hasattr(policy, 'net_config_cls') assert hasattr(policy, 'net_build_fun') # bookkeeping self.ob_space = ob_space self.ob_space = ac_space self.batch_size = batch_size self._ac_structure = tp_utils.template_structure_from_gym_space( ac_space) self.outputs = outputs # build the net policy_config = {} if policy_config is None else policy_config policy_config['batch_size'] = batch_size use_gpu = (gpu_id >= 0) self.data_server = InferDataServer( port=port, batch_size=batch_size, ds=ds, batch_worker_num=batch_worker_num, use_gpu=use_gpu, compress=compress, ) config = tf.ConfigProto(allow_soft_placement=True) if use_gpu: config.gpu_options.visible_device_list = str(gpu_id) config.gpu_options.allow_growth = True if 'use_xla' in policy_config and policy_config['use_xla']: config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 self._sess = tf.Session(config=config) self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config) self.net_out = policy.net_build_fun(self.data_server._batch_input, self.nc, scope='Inf_server') # saving/loading ops self.params = self.net_out.vars.all_vars self.params_ph = [ tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params ] self.params_assign_ops = [ p.assign(np_p) for p, np_p in zip(self.params, self.params_ph) ] # initialize the net params tf.global_variables_initializer().run(session=self._sess) self.setup_fetches(outputs) self.id_and_fetches = [self.data_server._batch_data_id, self.fetches] self._update_model()
class InfServer(object): def __init__(self, league_mgr_addr, model_pool_addrs, port, ds, batch_size, ob_space, ac_space, policy, outputs=['a'], policy_config={}, gpu_id=0, compress=True, batch_worker_num=4, update_model_seconds=60, learner_id=None, log_seconds=60, model_key="", task_attr='model_key', **kwargs): self._update_model_seconds = update_model_seconds self._log_seconds = log_seconds self._learner_id = learner_id self._task_attr = task_attr.split('.') if model_key: # If model_key is given, this indicates the infserver works # for a fixed model inference self._league_mgr_apis = None self.is_rl = False self.model_key = model_key else: # If model_key is absent, this indicates an infserver # that performs varying policy inference, and model_key will be # assigned by querying league_mgr self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr) self.is_rl = True self.model_key = None self.model = None self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) assert hasattr(policy, 'net_config_cls') assert hasattr(policy, 'net_build_fun') # bookkeeping self.ob_space = ob_space self.ob_space = ac_space self.batch_size = batch_size self._ac_structure = tp_utils.template_structure_from_gym_space( ac_space) self.outputs = outputs # build the net policy_config = {} if policy_config is None else policy_config policy_config['batch_size'] = batch_size use_gpu = (gpu_id >= 0) self.data_server = InferDataServer( port=port, batch_size=batch_size, ds=ds, batch_worker_num=batch_worker_num, use_gpu=use_gpu, compress=compress, ) config = tf.ConfigProto(allow_soft_placement=True) if use_gpu: config.gpu_options.visible_device_list = str(gpu_id) config.gpu_options.allow_growth = True if 'use_xla' in policy_config and policy_config['use_xla']: config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 self._sess = tf.Session(config=config) self.nc = policy.net_config_cls(ob_space, ac_space, **policy_config) self.net_out = policy.net_build_fun(self.data_server._batch_input, self.nc, scope='Inf_server') # saving/loading ops self.params = self.net_out.vars.all_vars self.params_ph = [ tf.placeholder(p.dtype, shape=p.get_shape()) for p in self.params ] self.params_assign_ops = [ p.assign(np_p) for p, np_p in zip(self.params, self.params_ph) ] # initialize the net params tf.global_variables_initializer().run(session=self._sess) self.setup_fetches(outputs) self.id_and_fetches = [self.data_server._batch_data_id, self.fetches] self._update_model() def load_model(self, loaded_params): self._sess.run( self.params_assign_ops[:len(loaded_params)], feed_dict={p: v for p, v in zip(self.params_ph, loaded_params)}) def setup_fetches(self, outputs): def split_batch(template, tf_structure): split_flatten = zip(*[ tf.split(t, self.batch_size) for t in nest.flatten_up_to(template, tf_structure) ]) return [ nest.pack_sequence_as(template, flatten) for flatten in split_flatten ] if self.nc.use_self_fed_heads: a = nest.map_structure_up_to(self._ac_structure, lambda head: head.sam, self.net_out.self_fed_heads) neglogp = nest.map_structure_up_to(self._ac_structure, lambda head: head.neglogp, self.net_out.self_fed_heads) flatparam = nest.map_structure_up_to(self._ac_structure, lambda head: head.flatparam, self.net_out.self_fed_heads) self.all_outputs = { 'a': split_batch(self._ac_structure, a), 'neglogp': split_batch(self._ac_structure, neglogp), 'flatparam': split_batch(self._ac_structure, flatparam), 'v': tf.split(self.net_out.value_head, self.batch_size) if self.net_out.value_head is not None else [[]] * self.batch_size, 'state': tf.split(self.net_out.S, self.batch_size) if self.net_out.S is not None else [[]] * self.batch_size } else: flatparam = nest.map_structure_up_to(self._ac_structure, lambda head: head.flatparam, self.net_out.outer_fed_heads) self.all_outputs = { 'flatparam': split_batch(self._ac_structure, flatparam), 'state': tf.split(self.net_out.S, self.batch_size) if self.net_out.S is not None else [[]] * self.batch_size } if self.nc.use_lstm and 'state' not in outputs: outputs.append('state') self.fetches = [ dict(zip(outputs, pred)) for pred in zip(*[self.all_outputs[o] for o in outputs]) ] def _update_model(self): if self.is_rl: # if (self.model_key is None or # (self.model is not None and self.model.is_freezed())): self._query_task() if self._should_update_model(self.model, self.model_key): self.model = self._model_pool_apis.pull_model(self.model_key) self.load_model(self.model.model) def _query_task(self): assert self.is_rl, '_query_task can be use in RL!' task = self._league_mgr_apis.query_learner_task(self._learner_id) while task is None: print('Learner has not request task! wait...') time.sleep(5) task = self._league_mgr_apis.query_learner_task(self._learner_id) self.last_model_key = self.model_key self.model_key = task for attr in self._task_attr: self.model_key = getattr(self.model_key, attr) return task def _should_update_model(self, model, model_key): if model is None or model_key != model.key: return True elif model.is_freezed(): return False else: return self._model_pool_apis.pull_attr( 'updatetime', model_key) > model.updatetime def run(self): while not self.data_server.ready: time.sleep(10) print('Waiting at least {} actors to ' 'connect ...'.format(self.batch_size), flush=True) last_update_time = time.time() last_log_time = last_update_time batch_num = 0 last_log_batch_num = 0 pid = os.getpid() while True: # input is pre-fetched in self.data_server data_ids, outputs = self._sess.run(self.id_and_fetches, {}) self.data_server.response(data_ids, outputs) batch_num += 1 t0 = time.time() if t0 > last_update_time + self._update_model_seconds: self._update_model() last_update_time = t0 t0 = time.time() if t0 > last_log_time + self._log_seconds: cost = t0 - last_log_time sam_num = self.batch_size * (batch_num - last_log_batch_num) print( 'Process {} predicts {} samples costs {} seconds, fps {}'. format(pid, sam_num, cost, sam_num / cost), flush=True) last_log_batch_num = batch_num last_log_time = t0
def __init__(self, ports, gpu_id, replay_filelist, batch_size, min_train_sample_num, min_val_sample_num, rm_size, learning_rate, print_interval, checkpoint_interval, num_val_batches, replay_converter_type, policy, policy_config, converter_config=None, policy_config_type=None, model_pool_addrs=None, rollout_length=1, checkpoints_dir=None, restore_checkpoint_path=None, train_generator_worker_num=4, val_generator_worker_num=2, pull_worker_num=2, num_sgd_updates=int(1e30), repeat_training_task=False, unroll_length=32, pub_interval=50, max_clip_grad_norm=1, after_loading_init_scope=None, use_mixed_precision=False, use_sparse_as_dense=False, enable_validation=True, post_process_data=None): assert len(ports) == 2 self.use_hvd = has_hvd and hvd.size() > 1 self.rank = 0 if not self.use_hvd else hvd.rank() self.model_key = 'IL-model' self.pub_interval = pub_interval self.rnn = (False if 'use_lstm' not in policy_config else policy_config['use_lstm']) self.hs_len = None # overwrite it using the batch_size for training policy_config['batch_size'] = batch_size if self.rnn: assert model_pool_addrs is not None self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) self._model_pool_apis.check_server_set_up() policy_config['rollout_len'] = rollout_length # infer hidden state length (size) if 'hs_len' in policy_config: self.hs_len = policy_config['hs_len'] elif 'nlstm' in policy_config: self.hs_len = 2 * policy_config['nlstm'] else: self.hs_len = 128 self.should_push_model = (self.rnn and self.rank == 0) use_gpu = (gpu_id >= 0) converter_config = {} if converter_config is None else converter_config train_replay_filelist, val_replay_filelist = _get_local_replays( replay_filelist) replay_converter = replay_converter_type(**converter_config) ob_space, ac_space = replay_converter.space.spaces if post_process_data is not None: ob_space, ac_space = post_process_data(ob_space, ac_space) self.data_pool = ImDataServer( ports=ports, train_replay_filelist=train_replay_filelist, val_replay_filelist=val_replay_filelist, batch_size=batch_size, min_train_sample_num=min_train_sample_num, min_val_sample_num=min_val_sample_num, ob_space=ob_space, ac_space=ac_space, train_generator_worker_num=train_generator_worker_num, val_generator_worker_num=val_generator_worker_num, pull_worker_num=pull_worker_num, rm_size=rm_size, repeat_training_task=repeat_training_task, unroll_length=unroll_length, rollout_length=rollout_length, lstm=self.rnn, hs_len=self.hs_len, use_gpu=use_gpu) self._enable_validation = enable_validation config = tf.ConfigProto(allow_soft_placement=True) if use_gpu: config.gpu_options.visible_device_list = str(gpu_id) config.gpu_options.allow_growth = True self._sess = tf.Session(config=config) net_config = policy_config_type(ob_space, ac_space, **policy_config) net_config_val = deepcopy(net_config) with tf.variable_scope('model', reuse=tf.AUTO_REUSE) as model_scope: pass def create_policy(inputs, nc): return policy(inputs=inputs, nc=nc, scope=model_scope) if hasattr(net_config, 'endpoints_verbosity'): # intentionally disables endpoints during training net_config.endpoints_verbosity = 0 device = '/gpu:0' if use_gpu else '/cpu:0' with tf.device(device): if 'use_xla' in policy_config and policy_config['use_xla']: try: # Use tensorflow's accerlated linear algebra compile method with tf.xla.experimental.jit_scope(True): model = create_policy(self.data_pool.train_batch_input, net_config) except: logger.log( "WARNING: using tf.xla requires tf version>=1.15.") model = create_policy(self.data_pool.train_batch_input, net_config) else: model = create_policy(self.data_pool.train_batch_input, net_config) model_val = create_policy(self.data_pool.val_batch_input, net_config_val) params = tf.trainable_variables(scope='model') param_norm = tf.global_norm(params) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-5) if use_mixed_precision: try: optimizer = tf.compat.v1.train.experimental.enable_mixed_precision_graph_rewrite( optimizer) except: logger.warn( "using tf mixed_precision requires tf version>=1.15.") if self.use_hvd: optimizer = hvd.DistributedOptimizer( optimizer, sparse_as_dense=use_sparse_as_dense) barrier_op = hvd.allreduce(tf.Variable(0.)) self.barrier = lambda: self._sess.run(barrier_op) train_loss = tf.reduce_mean(model.loss.total_il_loss * self.data_pool.train_batch_weight) val_loss = tf.reduce_mean(model_val.loss.total_il_loss * self.data_pool.val_batch_weight) if hasattr(net_config, 'weight_decay') and not net_config.weight_decay: # None or 0.0 total_loss = train_loss else: total_loss = train_loss + model.loss.total_reg_loss grads_and_vars = optimizer.compute_gradients(total_loss, params) clip_vars = model.vars.lstm_vars clip_grads = [grad for grad, var in grads_and_vars if var in clip_vars] nonclip_grads_and_vars = [(grad, var) for grad, var in grads_and_vars if var not in clip_vars] if max_clip_grad_norm > 0: clip_grads, clip_grad_norm = tf.clip_by_global_norm( clip_grads, max_clip_grad_norm) else: clip_grad_norm = tf.global_norm(clip_grads) clip_grads_and_var = list(zip(clip_grads, clip_vars)) grads_and_vars = clip_grads_and_var + nonclip_grads_and_vars grad_norm = tf.global_norm(list(zip(*grads_and_vars))[0]) train_op = optimizer.apply_gradients(grads_and_vars) tf.global_variables_initializer().run(session=self._sess) self.new_params = [ tf.placeholder(p.dtype, shape=p.get_shape()) for p in params ] self.param_assign_ops = [ p.assign(new_p) for p, new_p in zip(params, self.new_params) ] opt_params = optimizer.variables() self.new_opt_params = [ tf.placeholder(p.dtype, shape=p.get_shape()) for p in opt_params ] self.opt_param_assign_ops = [ p.assign(new_p) for p, new_p in zip(opt_params, self.new_opt_params) ] def read_params(): return self._sess.run(params) def read_opt_params(): return self._sess.run(opt_params) def load_model(np_new_params): self._sess.run( self.param_assign_ops, feed_dict={ p: np_p for p, np_p in zip(self.new_params, np_new_params) }) def restore_optimizer(np_new_opt_params): self._sess.run( self.opt_param_assign_ops, feed_dict={ p: np_p for p, np_p in zip(self.new_opt_params, np_new_opt_params) }) def _train_step(): return self._sess.run([ train_loss_aggregated, *train_other_losses_aggregated, grad_norm, clip_grad_norm, param_norm, train_op ], {})[:-1] def _val_step(): # maximal_feat = [tf.reduce_max(tf.cast(x, tf.float32)) # for x in self.data_pool.val_batch_input.X] # print(self._sess.run(maximal_feat, {})) return self._sess.run([ val_loss_aggregated, *val_other_losses_aggregated, *endpoints_aggregated ], {}) self._saver = ChkptsFromSelf(read_params, load_model, self.model_key) if restore_checkpoint_path is not None: self._saver._restore_model_checkpoint(restore_checkpoint_path) if after_loading_init_scope is not None: var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=after_loading_init_scope) logger.log('perform after loading init for vars') for v in var_list: logger.log(v) tf.variables_initializer(var_list).run(session=self._sess) if self.use_hvd: hvd.broadcast_global_variables(0).run(session=self._sess) _allreduce = lambda x: x if not self.use_hvd else hvd.allreduce(x) train_loss_aggregated = _allreduce(train_loss) train_other_loss_names = model.loss.loss_endpoints.keys() train_other_losses_aggregated = [ _allreduce(tf.reduce_mean(l * self.data_pool.train_batch_weight)) for l in model.loss.loss_endpoints.values() ] val_loss_aggregated = _allreduce(val_loss) val_other_loss_names = model_val.loss.loss_endpoints.keys() val_other_losses_aggregated = [ _allreduce(tf.reduce_mean(l * self.data_pool.val_batch_weight)) for l in model_val.loss.loss_endpoints.values() ] endpoints_names = model_val.endpoints.keys() endpoints_aggregated = [ _allreduce(tf.reduce_mean(l)) for l in model_val.endpoints.values() ] self._sess.graph.finalize() self._total_samples = lambda: [ self.data_pool._num_train_samples, self.data_pool._num_val_samples ] self._train_log_names = (['loss'] + list(train_other_loss_names) + ['grad_norm', 'clip_grad_norm', 'param_norm']) self._val_log_names = (['loss'] + list(val_other_loss_names) + list(endpoints_names)) self._batch_size = batch_size self._train_step = _train_step self._val_step = _val_step self._print_interval = print_interval self._checkpoint_interval = checkpoint_interval self._num_val_batches = num_val_batches self._checkpoints_dir = checkpoints_dir if self.rank == 0 else None self._num_sgd_updates = num_sgd_updates self.load_model = load_model self.restore_optimizer = restore_optimizer self.read_params = read_params self.read_opt_params = read_opt_params format_strs = ['log', 'tensorboard', 'csv'] logger.configure(dir='training_log/rank{}'.format(self.rank), format_strs=['stdout'] + format_strs) with logger.scoped_configure(dir='validation_log/rank{}'.format( self.rank), format_strs=['stderr'] + format_strs): self.val_logger = logger.Logger.CURRENT
def __init__(self, learner_addr, replay_dir, replay_converter_type, policy=None, policy_config=None, model_pool_addrs=None, n_v=1, log_interval=50, step_mul=8, SC2_bin_root='/root/', game_version='3.16.1', unroll_length=32, update_model_freq=32, converter_config=None, agent_cls=None, infserver_addr=None, compress=True, da_rate=-1., unk_mmr_dft_to=4000): self._data_pool_apis = ImLearnerAPIs(learner_addr) self._SC2_bin_root = SC2_bin_root self._log_interval = log_interval self._replay_dir = replay_dir self._step_mul = step_mul self._game_version = game_version self._unroll_length = unroll_length self._data_queue = Queue(unroll_length) self._push_thread = Thread(target=self._push_data, args=(self._data_queue, )) self._push_thread.daemon = True self._push_thread.start() self.converter_config = {} if converter_config is None else converter_config self.converter_config['game_version'] = game_version self.replay_converter_type = replay_converter_type self._replay_converter = replay_converter_type(**self.converter_config) self._use_policy = policy is not None self._update_model_freq = update_model_freq self.model_key = 'IL-model' self._da_rate = da_rate self._unk_mmr_dft_to = unk_mmr_dft_to self._system = platform.system() ob_space, ac_space = self._replay_converter.space if self._use_policy: self.model = None policy_config = {} if policy_config is None else policy_config agent_cls = agent_cls or PPOAgent policy_config['batch_size'] = 1 policy_config['rollout_len'] = 1 policy_config['use_loss_type'] = 'none' self.infserver_addr = infserver_addr if infserver_addr is None: self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) self.agent = agent_cls(policy, ob_space, ac_space, n_v=n_v, scope_name='self', policy_config=policy_config) else: nc = policy.net_config_cls(ob_space, ac_space, **policy_config) ds = InfData(ob_space, ac_space, policy_config['use_self_fed_heads'], nc.use_lstm, nc.hs_len) self.agent = PGAgentGPU(infserver_addr, ds, nc.hs_len, compress) self.ds = ILData(ob_space, ac_space, self._use_policy, 1) # hs_len does not matter
class ReplayActor(object): def __init__(self, learner_addr, replay_dir, replay_converter_type, policy=None, policy_config=None, model_pool_addrs=None, n_v=1, log_interval=50, step_mul=8, SC2_bin_root='/root/', game_version='3.16.1', unroll_length=32, update_model_freq=32, converter_config=None, agent_cls=None, infserver_addr=None, compress=True, da_rate=-1., unk_mmr_dft_to=4000): self._data_pool_apis = ImLearnerAPIs(learner_addr) self._SC2_bin_root = SC2_bin_root self._log_interval = log_interval self._replay_dir = replay_dir self._step_mul = step_mul self._game_version = game_version self._unroll_length = unroll_length self._data_queue = Queue(unroll_length) self._push_thread = Thread(target=self._push_data, args=(self._data_queue, )) self._push_thread.daemon = True self._push_thread.start() self.converter_config = {} if converter_config is None else converter_config self.converter_config['game_version'] = game_version self.replay_converter_type = replay_converter_type self._replay_converter = replay_converter_type(**self.converter_config) self._use_policy = policy is not None self._update_model_freq = update_model_freq self.model_key = 'IL-model' self._da_rate = da_rate self._unk_mmr_dft_to = unk_mmr_dft_to self._system = platform.system() ob_space, ac_space = self._replay_converter.space if self._use_policy: self.model = None policy_config = {} if policy_config is None else policy_config agent_cls = agent_cls or PPOAgent policy_config['batch_size'] = 1 policy_config['rollout_len'] = 1 policy_config['use_loss_type'] = 'none' self.infserver_addr = infserver_addr if infserver_addr is None: self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) self.agent = agent_cls(policy, ob_space, ac_space, n_v=n_v, scope_name='self', policy_config=policy_config) else: nc = policy.net_config_cls(ob_space, ac_space, **policy_config) ds = InfData(ob_space, ac_space, policy_config['use_self_fed_heads'], nc.use_lstm, nc.hs_len) self.agent = PGAgentGPU(infserver_addr, ds, nc.hs_len, compress) self.ds = ILData(ob_space, ac_space, self._use_policy, 1) # hs_len does not matter def run(self): self.replay_task = self._data_pool_apis.request_replay_task() while self.replay_task != "": game_version = self.replay_task.game_version or self._game_version self._adapt_system(game_version) if game_version != self._game_version: # need re-init replay converter self._game_version = game_version self.converter_config['game_version'] = game_version self._replay_converter = self.replay_converter_type( **self.converter_config) game_core_config = ({} if 'game_core_config' not in self.converter_config else self.converter_config['game_core_config']) extractor = ReplayExtractor( replay_dir=self._replay_dir, replay_filename=self.replay_task.replay_name, player_id=self.replay_task.player_id, replay_converter=self._replay_converter, step_mul=self._step_mul, version=game_version, game_core_config=game_core_config, da_rate=self._da_rate, unk_mmr_dft_to=self._unk_mmr_dft_to) self._steps = 0 first_frame = True if self._use_policy: self.agent.reset() self._update_agent_model() for frame in extractor.extract(): if self._use_policy: data = (*frame[0], self.agent.state, np.array(first_frame, np.bool)) self.agent.update_state(frame[0][0]) first_frame = False else: data = frame[0] data = self.ds.flatten(self.ds.structure(data)) if self._data_queue.full(): logger.log("Actor's queue is full.", level=logger.WARN) self._data_queue.put((TensorZipper.compress(data), frame[1])) logger.log('successfully put one tuple.', level=logger.DEBUG) self._steps += 1 if self._steps % self._log_interval == 0: logger.log( "%d frames of replay task [%s] sent to learner." % (self._steps, self.replay_task)) if self._use_policy and self._steps % self._update_model_freq == 0: self._update_agent_model() logger.log("Replay task [%s] done. %d frames sent to learner." % (self.replay_task, self._steps)) self.replay_task = self._data_pool_apis.request_replay_task() logger.log("All tasks done.") def _adapt_system(self, game_version): # TODO(pengsun): any stuff for Darwin, Window? if self._system == 'Linux': # set the SC2PATH for sc2 binary. See deepmind/pysc2 doc. if game_version != '4.7.1' or 'SC2PATH' in os.environ: os.environ['SC2PATH'] = os.path.join(self._SC2_bin_root, game_version) return def _update_agent_model(self): if self.infserver_addr is not None: return logger.log('entering _update_agents_model', 'steps: {}'.format(self._steps), level=logger.DEBUG + 5) if self._should_update_model(self.model, self.model_key): model = self._model_pool_apis.pull_model(self.model_key) self.agent.load_model(model.model) self.model = model def _should_update_model(self, model, model_key): if model is None: return True else: return self._model_pool_apis.pull_attr( 'updatetime', model_key) > model.updatetime def _push_data(self, data_queue): """ push trajectory for the learning agent (id 0). Invoked in a thread """ while data_queue.empty(): time.sleep(5) logger.log('entering _push_data_to_learner', 'steps: {}'.format(self._steps), level=logger.DEBUG + 5) while True: task = self.replay_task frames = [] weights = [] for _ in range(self._unroll_length): frame, weight = data_queue.get() frames.append(frame) weights.append(weight) self._data_pool_apis.push_data((task, frames, weights))
def test_push_model(self): client = ModelPoolAPIs(model_pool_addrs=[ "localhost:11001:11006", "localhost:11002:11007" ]) key1 = str(uuid.uuid1()) client.push_model(None, None, key1)
def test_checkpoint(self): league_client = LeagueMgrAPIs(league_mgr_addr="localhost:11007") model_client1 = ModelPoolAPIs(model_pool_addrs=["localhost:11001:11006"]) hyperparam = MutableHyperparam() model_key1 = str(uuid.uuid1()) model_key2 = str(uuid.uuid1()) model_client1.push_model("model_data1", hyperparam, model_key1) model_client1.push_model("model_data2", hyperparam, model_key2) time.sleep(4) league_client.request_add_model( Model("model_data1", hyperparam, model_key1)) model_client1.push_model("model_data3", hyperparam, model_key2) time.sleep(3) checkpoints = [filename for filename in os.listdir("./checkpoints") if filename.startswith("checkpoint")] self.assertTrue(len(checkpoints) > 0) checkpoint_dir = os.path.join("./checkpoints", checkpoints[-1]) league_process = Process( target=lambda: LeagueMgr( port="11008", model_pool_addrs=["localhost:11011:11016"], mutable_hyperparam_type='MutableHyperparam', restore_checkpoint_dir=checkpoint_dir).run()) league_process.start() model_client2 = ModelPoolAPIs(model_pool_addrs=["localhost:11011:11016"]) time.sleep(2) keys = model_client2.pull_keys() self.assertTrue(model_key1 in keys) self.assertTrue(model_key2 in keys) model1 = model_client1.pull_model(model_key1) model2 = model_client2.pull_model(model_key1) self.assertEqual(model1.model, model2.model) self.assertEqual(model1.key, model2.key) self.assertEqual(model1.createtime, model2.createtime) model1 = model_client1.pull_model(model_key2) model2 = model_client2.pull_model(model_key2) self.assertEqual(model1.model, model2.model) self.assertEqual(model1.key, model2.key) self.assertEqual(model1.createtime, model2.createtime) league_process.terminate()
class BaseLearner(object): """Base learner class. Define the basic workflow for a learner.""" def __init__(self, league_mgr_addr, model_pool_addrs, learner_ports, learner_id=''): if learner_id: self._learner_id = learner_id else: self._learner_id = str(uuid.uuid1()) self._zmq_context = zmq.Context() self._rep_socket = self._zmq_context.socket(zmq.REP) self._rep_socket.bind("tcp://*:%s" % learner_ports[0]) self._pull_socket = self._zmq_context.socket(zmq.PULL) self._pull_socket.setsockopt(zmq.RCVHWM, 1) self._pull_socket.bind("tcp://*:%s" % learner_ports[1]) self._message_thread = Thread(target=self._message_worker) self._message_thread.daemon = True self._message_thread.start() self._league_mgr_apis = LeagueMgrAPIs(league_mgr_addr) self._model_pool_apis = ModelPoolAPIs(model_pool_addrs) self.task = None self.model_key = None self.last_model_key = None self._lrn_period_count = 0 # learning period count self._pull_lock = Lock() def run(self): while True: self.task = self._request_task() self._init_task() self._train() self._finish_task() self._lrn_period_count += 1 @abstractmethod def _train(self, **kwargs): pass @abstractmethod def _init_task(self): pass def _request_task(self): task = self._league_mgr_apis.request_learner_task(self._learner_id) self.last_model_key = self.model_key self.model_key = task.model_key # lazy freeze the model of last lp, then actors will stop the last lp. if self.last_model_key and self.model_key != self.last_model_key: self._model_pool_apis.freeze_model(self.last_model_key) return task def _query_task(self): task = self._league_mgr_apis.query_learner_task(self._learner_id) if task is not None: self.last_model_key = self.model_key self.model_key = task.model_key return task def _finish_task(self): self._notify_task_end() def _pull_data(self): self._pull_lock.acquire() data = self._pull_socket.recv(copy=False) self._pull_lock.release() return pickle.loads(data) def _message_worker(self): while True: msg = self._rep_socket.recv_string() if msg == 'learner_id': self._rep_socket.send_pyobj(self._learner_id) else: raise RuntimeError("message not recognized") def _notify_task_begin(self, task): self._league_mgr_apis.notify_learner_task_begin(self._learner_id, task) def _notify_task_end(self): self._league_mgr_apis.notify_learner_task_end(self._learner_id)