def background_learning(self): while not self.buffer.good_to_learn: time.sleep(1) pwc('Start Learning...', 'blue') t = 0 while True: t += 1 self.learn(t)
def save(ckpt_manager, print_terminal_info=True): """ Saves model Args: ckpt_manager: An instance of tf.train.CheckpointManager message: optional message for print """ path = ckpt_manager.save() if print_terminal_info: pwc(f'Model saved at {path}', color='cyan')
def save(self, step=None, message=''): if hasattr(self, 'saver'): path = self.saver.save(self.sess, self.model_file, global_step=step) if message: message = f'\n{message}' pwc(f'Model saved at {path}{message}', 'magenta') else: # name intention to treat name saver as an error, just print a warning message pwc('name saver is available', 'magenta')
def main(env_config, model_config, agent_config, replay_config, n, record=False, size=(128, 128), video_len=1000, fps=30, save=False): silence_tf_logs() configure_gpu() configure_precision(agent_config.get('precision', 32)) use_ray = env_config.get('n_workers', 0) > 1 if use_ray: import ray ray.init() sigint_shutdown_ray() algo_name = agent_config['algorithm'] env_name = env_config['name'] try: make_env = pkg.import_module('env', algo_name, place=-1).make_env except: make_env = None env_config.pop('reward_clip', False) env = create_env(env_config, env_fn=make_env) create_model, Agent = pkg.import_agent(config=agent_config) models = create_model(model_config, env) agent = Agent(config=agent_config, models=models, dataset=None, env=env) if n < env.n_envs: n = env.n_envs scores, epslens, video = evaluate(env, agent, n, record=record, size=size, video_len=video_len) pwc(f'After running {n} episodes', f'Score: {np.mean(scores):.3g}\tEpslen: {np.mean(epslens):.3g}', color='cyan') if record: save_video(f'{algo_name}-{env_name}', video, fps=fps) if use_ray: ray.shutdown()
def __exit__(self, exc_type, exc_value, traceback): if self._to_log: duration = time() - self._start aggregator = self.aggregators[self._summary_name] aggregator.add(duration) if aggregator.count >= self._period: duration = aggregator.average() step = tf.summary.experimental.get_step() tf.summary.scalar(f'timer/{self._summary_name}', duration, step=step) aggregator.reset() if self._print_terminal_info: pwc(f'{self._summary_name} duration: "{duration}" averaged over {self._period} times', color='blue')
def timeit(func, *args, name=None, to_print=False, **kwargs): start_time = gmtime() start = time() result = func(*args, **kwargs) end = time() end_time = gmtime() if to_print: pwc( f'{name if name else func.__name__}: ' f'Start "{strftime("%d %b %H:%M:%S", start_time)}"', f'End "{strftime("%d %b %H:%M:%S", end_time)}" ' f'Duration "{end - start:.3g}s"', color='blue') return end - start, result
def __exit__(self, exc_type, exc_value, traceback): if self._to_log: duration = time() - self._start aggregator = self.aggregators[self._summary_name] aggregator.add(duration) if self._period is not None and aggregator.count >= self._period: if self._mode == 'average': duration = aggregator.average() duration = (f'{duration*1000:.3g}ms' if duration < 1e-1 else f'{duration:.3g}s') pwc(f'{self._summary_name} duration: "{duration}" averaged over {self._period} times', color='blue') aggregator.reset() else: duration = aggregator.sum pwc(f'{self._summary_name} duration: "{duration}" for {aggregator.count} times', color='blue')
def demonstrate(self): state = self.env_vec.reset() state = np.reshape(state, (-1, *self.env_vec.state_shape)) if self.use_lstm: self.last_lstm_state = self.sess.run( self.ac.initial_state, feed_dict={self.env_phs['state']: state}) for _ in range(self.env_vec.max_episode_steps): action, _, _ = self.act(state) state, _, done, _ = self.env_vec.step(action) if done: break pwc(f'Demonstration score:\t{self.env_vec.get_score()}', 'green') pwc(f'Demonstration length:\t{self.env_vec.get_epslen()}', 'green')
def restore(ckpt_manager, ckpt, ckpt_path, name='model'): """ Restores the latest parameter recorded by ckpt_manager Args: ckpt_manager: An instance of tf.train.CheckpointManager ckpt: An instance of tf.train.Checkpoint ckpt_path: The directory in which to write checkpoints name: optional name for print """ path = ckpt_manager.latest_checkpoint if path: ckpt.restore(path) #.assert_consumed() pwc(f'Params for {name} are restored from "{path}".', color='cyan') else: pwc(f'No model for {name} is found at "{ckpt_path}"!', f'Start training from scratch.', color='cyan') return bool(path)
def _act_loop(self, workers, learner): pwc('Action loop starts', color='cyan') objs = {workers[wid].reset_env.remote(eid): (wid, eid) for wid in range(self._n_workers) for eid in range(self._envs_per_worker)} while True: ready_objs, not_objs = ray.wait(list(objs), self._action_batch) worker_ids, env_ids = zip(*[objs[i] for i in ready_objs]) for oid in ready_objs: del objs[oid] obs, reward, discount, already_done = zip(*ray.get(ready_objs)) # track ready info wids, eids, os, rs, ads = [], [], [], [], [] for wid, eid, o, r, d, ad in zip( worker_ids, env_ids, obs, reward, discount, already_done): if ad: objs[workers[wid].reset_env.remote(eid)] = (wid, eid) self.finish_episode(learner, wid, eid, o, r, d) self.reset_states(wid, eid) else: self.store_transition(wid, eid, o, r, d) wids.append(wid) eids.append(eid) os.append(o) rs.append(r) ads.append(ad) if os: if self._store_state: actions, states = self(wids, eids, os) names = states._fields [self._cache[(wid, eid)].append( dict(action=a, **{n: ss for n, ss in zip(names, s)})) for wid, eid, a, s in zip(wids, eids, actions, zip(*states))] else: actions = self(wids, eids, os) [self._cache[(wid, eid)].append(dict(action=a)) for wid, eid, a in zip(wids, eids, actions)] objs.update({workers[wid].env_step.remote(eid, a): (wid, eid) for wid, eid, a in zip(wids, eids, actions)})
def restore(self, model_file=None): """ To restore a specific version of model, set filename to the model stored in saved_models """ if model_file: self.model_file = model_file if not hasattr(self, 'saver'): self.saver = self._setup_saver() try: ckpt = tf.train.latest_checkpoint(self.model_file) self.saver.restore(self.sess, ckpt) except: pwc( f'Model {self.model_name}: no saved model for "{self.name}" is found at "{self.model_file}"!', 'magenta') import sys sys.exit() else: pwc( f'Model {self.model_name}: Params for {self.name} are restored from "{self.model_file}".', 'magenta')
def load_and_run(directory): # load model and log path config_file = None for root, _, files in os.walk(directory): for f in files: if 'src' in root: break if f == 'config.yaml' and config_file is None: config_file = os.path.join(root, f) break elif f == 'config.yaml' and config_file is not None: pwc(f'Get multiple "config.yaml": "{config_file}" and "{os.path.join(root, f)}"' ) sys.exit() config = load_config(config_file) configs = decompose_config(config) main = pkg.import_main('train', config=configs.agent) main(*configs)
def change_config(kw, model_name, env_config, model_config, agent_config, replay_config): """ Changes configs based on kw. model_name will be modified accordingly to embody changes """ if kw: for s in kw: key, value = s.split('=') value = eval_str(value) if model_name != '': model_name += '-' model_name += s # change kwargs in config configs = [] config_keys = ['env', 'model', 'agent', 'replay'] config_values = [ env_config, model_config, agent_config, replay_config ] for k, v in model_config.items(): if isinstance(v, dict): config_keys.append(k) config_values.append(v) for name, config in zip(config_keys, config_values): if key in config: configs.append((name, config)) assert configs, f'"{s}" does not appear in any config!' if len(configs) > 1: pwc(f'All {key} appeared in the following configs will be changed: ' + f'{list([n for n, _ in configs])}.', color='cyan') for _, c in configs: c[key] = value return model_name
def display_var_info(vars, name='trainable'): pwc(f'Print {name} variables', 'yellow') count_params = 0 for v in vars: name = v.name if '/Adam' in name or 'beta1_power' in name or 'beta2_power' in name: continue v_params = int(np.prod(v.shape.as_list())) count_params += v_params if '/b:' in name or '/biases' in name: continue # Wx+b, bias is not interesting to look at => count params, but not print pwc(f' {name}{" "*(100-len(name))} {v_params:d} params {v.shape}', 'yellow') pwc(f'Total model parameters: {count_params*1e-6:0.2f} million', 'yellow')
def __init__(self, log_dir=None, log_file='log.txt'): """ Initialize a Logger. Args: log_dir (string): A directory for saving results to. If `None/False`, Logger only serves as a storage but doesn't write anything to the disk. log_file (string): Name for the tab-separated-value file containing metrics logged throughout a training run. Defaults to "log.txt". """ log_file = log_file if log_file.endswith('log.txt') \ else log_file + '/log.txt' self._log_dir = log_dir if self._log_dir: path = os.path.join(self._log_dir, log_file) if os.path.exists(path) and os.stat(path).st_size != 0: i = 1 name, suffix = path.rsplit('.', 1) while os.path.exists(name + f'{i}.' + suffix): i += 1 pwc(f'Warning: Log file "{path}" already exists!', f'Data will be logged to "{name + f"{i}." + suffix}" instead.', color='magenta') path = name + f"{i}." + suffix if not os.path.isdir(self._log_dir): os.makedirs(self._log_dir) self._out_file = open(path, 'w') atexit.register(self._out_file.close) pwc(f'Logging data to "{self._out_file.name}"', color='green') else: self._out_file = None pwc( f'Log directory is not specified, ' 'no data will be logged to the disk', color='magenta') self._first_row = True self._log_headers = [] self._log_current_row = {} self._store_dict = defaultdict(list)
if __name__ == '__main__': args = parse_eval_args() # search for config.yaml directory = args.directory config_file = None for root, _, files in os.walk(directory): for f in files: if 'src' in root: break elif f.endswith('config.yaml') and config_file is None: config_file = os.path.join(root, f) break elif f.endswith('config.yaml') and config_file is not None: pwc(f'Get multiple "config.yaml": "{config_file}" and "{os.path.join(root, f)}"' ) sys.exit() # load respective config config = load_config(config_file) env_config = config['env'] model_config = config['model'] agent_config = config['agent'] replay_config = config.get('buffer') or config.get('replay') agent_config['logger'] = False # get the main function try: main = pkg.import_main('eval', config=agent_config) except: print('Default main is used for evaluation')
def print_construction_complete(self): pwc(f'{self.name} has been constructed', 'cyan')
def print_construction_complete(self): pwc(f'{self.name.upper()} is constructed...', color='cyan')
def print_construction_complete(self): pwc(f'Worker {self.no} has been constructed.', 'cyan')
def print_construction_complete(self): pwc('Learner has been constructed.', 'cyan')
def set_weights(self, weights): pwc('Learner: pull weights from the evaluator', 'blue') self.variables.set_flat(weights)
def sample_data(self, learner, evaluator): def collect_fn(state, action, reward, done): self.buffer.add_data(state, action, reward, done) def pull_weights_from_learner(): # pull weights from learner weights = ray.get(learner.get_weights.remote()) self.variables.set_flat(weights) to_record = self.no == 0 scores = deque(maxlen=self.weight_update_freq) epslens = deque(maxlen=self.weight_update_freq) best_score_mean = -50 episode_i = 0 step = 0 while True: episode_i += 1 fn = None if to_record else collect_fn score, epslen = self.run_trajectory(fn=fn, evaluation=to_record) step += epslen scores.append(score) epslens.append(epslen) if episode_i % self.weight_update_freq == 0: score_mean = np.mean(scores) if to_record: # record stats stats = dict( Timing='Eval', WorkerNo=self.no, Steps=episode_i, ScoreMean=score_mean, ScoreStd=np.std(scores), ScoreMax=np.max(score), EpslenMean=np.mean(epslens), EpslenStd=np.std(epslens), ) tf_stats = dict(worker_no=f'worker_{self.no}') tf_stats.update(stats) learner.record_stats.remote(tf_stats) learner.rl_log.remote(stats) if score_mean > min(250, best_score_mean): best_score_mean = score_mean pwc( f'Worker {self.no}: Best score updated to {best_score_mean:2f}', 'blue') evaluator.evaluate_model.remote( self.variables.get_flat(), score_mean) # send data to learner if self.buffer.idx == self.buffer.capacity: last_state = np.zeros_like(self.buffer['state'][0]) self.buffer.add_last_state(last_state) self.buffer['priority'][:self.buffer. idx] = self.compute_priorities( ) # push samples to the central buffer after each episode learner.merge_buffer.remote(dict(self.buffer), self.buffer.idx) self.buffer.reset() # pull weights from learner weights = ray.get(learner.get_weights.remote()) self.variables.set_flat(weights)
def main(env_config, model_config, agent_config, replay_config, n, record=False, size=(128, 128), video_len=1000, fps=30, save=False): logging.basicConfig(level=logging.DEBUG) silence_tf_logs() configure_gpu() configure_precision(agent_config.get('precision', 32)) use_ray = env_config.get('n_workers', 0) > 1 if use_ray: import ray ray.init() sigint_shutdown_ray() algo_name = agent_config['algorithm'] env_name = env_config['name'] if record: env_config['log_episode'] = True env_config['n_workers'] = env_config['n_envs'] = 1 env = create_env(env_config) create_model, Agent = pkg.import_agent(config=agent_config) models = create_model(model_config, env) agent = Agent(config=agent_config, models=models, dataset=None, env=env) if save: n_workers = env_config.get('n_workers', 1) n_envs = env_config.get('n_envs', 1) replay_config['n_envs'] = n_workers * n_envs replay_config['replay_type'] = 'uniform' replay_config['dir'] = f'data/{agent.name.lower()}-{env.name.lower()}' replay_config['n_steps'] = 1 replay_config['save'] = True replay_config['save_temp'] = True replay_config['capacity'] = int(1e6) replay_config['has_next_obs'] = True replay = create_replay(replay_config) def collect(obs, action, reward, discount, next_obs, logpi, **kwargs): replay.add(obs=obs, action=action, reward=reward, discount=discount, next_obs=next_obs, logpi=logpi) else: def collect(**kwargs): pass if n < env.n_envs: n = env.n_envs scores, epslens, video = evaluate(env, agent, n, record=record, size=size, video_len=video_len, step_fn=collect) pwc(f'After running {n} episodes', f'Score: {np.mean(scores):.3g}\tEpslen: {np.mean(epslens):.3g}', color='cyan') if save: replay.save() if record: save_video(f'{algo_name}-{env_name}', video, fps=fps) if use_ray: ray.shutdown()