Ejemplo n.º 1
0
 def background_learning(self):
     while not self.buffer.good_to_learn:
         time.sleep(1)
     pwc('Start Learning...', 'blue')
     
     t = 0
     while True:
         t += 1
         self.learn(t)
Ejemplo n.º 2
0
def save(ckpt_manager, print_terminal_info=True):
    """ Saves model

    Args:
        ckpt_manager: An instance of tf.train.CheckpointManager
        message: optional message for print
    """
    path = ckpt_manager.save()
    if print_terminal_info:
        pwc(f'Model saved at {path}', color='cyan')
Ejemplo n.º 3
0
 def save(self, step=None, message=''):
     if hasattr(self, 'saver'):
         path = self.saver.save(self.sess,
                                self.model_file,
                                global_step=step)
         if message:
             message = f'\n{message}'
         pwc(f'Model saved at {path}{message}', 'magenta')
     else:
         # name intention to treat name saver as an error, just print a warning message
         pwc('name saver is available', 'magenta')
Ejemplo n.º 4
0
def main(env_config,
         model_config,
         agent_config,
         replay_config,
         n,
         record=False,
         size=(128, 128),
         video_len=1000,
         fps=30,
         save=False):
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config.get('precision', 32))

    use_ray = env_config.get('n_workers', 0) > 1
    if use_ray:
        import ray
        ray.init()
        sigint_shutdown_ray()

    algo_name = agent_config['algorithm']
    env_name = env_config['name']

    try:
        make_env = pkg.import_module('env', algo_name, place=-1).make_env
    except:
        make_env = None
    env_config.pop('reward_clip', False)
    env = create_env(env_config, env_fn=make_env)
    create_model, Agent = pkg.import_agent(config=agent_config)
    models = create_model(model_config, env)

    agent = Agent(config=agent_config, models=models, dataset=None, env=env)

    if n < env.n_envs:
        n = env.n_envs
    scores, epslens, video = evaluate(env,
                                      agent,
                                      n,
                                      record=record,
                                      size=size,
                                      video_len=video_len)
    pwc(f'After running {n} episodes',
        f'Score: {np.mean(scores):.3g}\tEpslen: {np.mean(epslens):.3g}',
        color='cyan')

    if record:
        save_video(f'{algo_name}-{env_name}', video, fps=fps)
    if use_ray:
        ray.shutdown()
Ejemplo n.º 5
0
 def __exit__(self, exc_type, exc_value, traceback):
     if self._to_log:
         duration = time() - self._start
         aggregator = self.aggregators[self._summary_name]
         aggregator.add(duration)
         if aggregator.count >= self._period:
             duration = aggregator.average()
             step = tf.summary.experimental.get_step()
             tf.summary.scalar(f'timer/{self._summary_name}',
                               duration,
                               step=step)
             aggregator.reset()
             if self._print_terminal_info:
                 pwc(f'{self._summary_name} duration: "{duration}" averaged over {self._period} times',
                     color='blue')
Ejemplo n.º 6
0
def timeit(func, *args, name=None, to_print=False, **kwargs):
    start_time = gmtime()
    start = time()
    result = func(*args, **kwargs)
    end = time()
    end_time = gmtime()

    if to_print:
        pwc(
            f'{name if name else func.__name__}: '
            f'Start "{strftime("%d %b %H:%M:%S", start_time)}"',
            f'End "{strftime("%d %b %H:%M:%S", end_time)}" '
            f'Duration "{end - start:.3g}s"',
            color='blue')

    return end - start, result
Ejemplo n.º 7
0
 def __exit__(self, exc_type, exc_value, traceback):
     if self._to_log:
         duration = time() - self._start
         aggregator = self.aggregators[self._summary_name]
         aggregator.add(duration)
         if self._period is not None and aggregator.count >= self._period:
             if self._mode == 'average':
                 duration = aggregator.average()
                 duration = (f'{duration*1000:.3g}ms'
                             if duration < 1e-1 else f'{duration:.3g}s')
                 pwc(f'{self._summary_name} duration: "{duration}" averaged over {self._period} times',
                     color='blue')
                 aggregator.reset()
             else:
                 duration = aggregator.sum
                 pwc(f'{self._summary_name} duration: "{duration}" for {aggregator.count} times',
                     color='blue')
Ejemplo n.º 8
0
    def demonstrate(self):
        state = self.env_vec.reset()
        state = np.reshape(state, (-1, *self.env_vec.state_shape))
        if self.use_lstm:
            self.last_lstm_state = self.sess.run(
                self.ac.initial_state,
                feed_dict={self.env_phs['state']: state})

        for _ in range(self.env_vec.max_episode_steps):
            action, _, _ = self.act(state)
            state, _, done, _ = self.env_vec.step(action)

            if done:
                break

        pwc(f'Demonstration score:\t{self.env_vec.get_score()}', 'green')
        pwc(f'Demonstration length:\t{self.env_vec.get_epslen()}', 'green')
Ejemplo n.º 9
0
def restore(ckpt_manager, ckpt, ckpt_path, name='model'):
    """ Restores the latest parameter recorded by ckpt_manager

    Args:
        ckpt_manager: An instance of tf.train.CheckpointManager
        ckpt: An instance of tf.train.Checkpoint
        ckpt_path: The directory in which to write checkpoints
        name: optional name for print
    """
    path = ckpt_manager.latest_checkpoint
    if path:
        ckpt.restore(path)  #.assert_consumed()
        pwc(f'Params for {name} are restored from "{path}".', color='cyan')
    else:
        pwc(f'No model for {name} is found at "{ckpt_path}"!',
            f'Start training from scratch.',
            color='cyan')
    return bool(path)
Ejemplo n.º 10
0
        def _act_loop(self, workers, learner):
            pwc('Action loop starts', color='cyan')
            objs = {workers[wid].reset_env.remote(eid): (wid, eid)
                for wid in range(self._n_workers) 
                for eid in range(self._envs_per_worker)}

            while True:
                ready_objs, not_objs = ray.wait(list(objs), self._action_batch)
                worker_ids, env_ids = zip(*[objs[i] for i in ready_objs])
                for oid in ready_objs:
                    del objs[oid]
                obs, reward, discount, already_done = zip(*ray.get(ready_objs))
                # track ready info
                wids, eids, os, rs, ads = [], [], [], [], []
                for wid, eid, o, r, d, ad in zip(
                    worker_ids, env_ids, obs, reward, discount, already_done):
                    if ad:
                        objs[workers[wid].reset_env.remote(eid)] = (wid, eid)
                        self.finish_episode(learner, wid, eid, o, r, d)
                        self.reset_states(wid, eid)
                    else:
                        self.store_transition(wid, eid, o, r, d)
                        wids.append(wid)
                        eids.append(eid)
                        os.append(o)
                        rs.append(r)
                        ads.append(ad)

                if os:
                    if self._store_state:
                        actions, states = self(wids, eids, os)
                        names = states._fields
                        [self._cache[(wid, eid)].append(
                            dict(action=a, **{n: ss for n, ss in zip(names, s)}))
                        for wid, eid, a, s in zip(wids, eids, actions, zip(*states))]
                    else:
                        actions = self(wids, eids, os)
                        [self._cache[(wid, eid)].append(dict(action=a))
                            for wid, eid, a in zip(wids, eids, actions)]
                    objs.update({workers[wid].env_step.remote(eid, a): (wid, eid)
                        for wid, eid, a in zip(wids, eids, actions)})
Ejemplo n.º 11
0
 def restore(self, model_file=None):
     """
     To restore a specific version of model, set filename to the model stored in saved_models
     """
     if model_file:
         self.model_file = model_file
     if not hasattr(self, 'saver'):
         self.saver = self._setup_saver()
     try:
         ckpt = tf.train.latest_checkpoint(self.model_file)
         self.saver.restore(self.sess, ckpt)
     except:
         pwc(
             f'Model {self.model_name}: no saved model for "{self.name}" is found at "{self.model_file}"!',
             'magenta')
         import sys
         sys.exit()
     else:
         pwc(
             f'Model {self.model_name}: Params for {self.name} are restored from "{self.model_file}".',
             'magenta')
Ejemplo n.º 12
0
Archivo: train.py Proyecto: xlnwel/d2rl
def load_and_run(directory):
    # load model and log path
    config_file = None
    for root, _, files in os.walk(directory):
        for f in files:
            if 'src' in root:
                break
            if f == 'config.yaml' and config_file is None:
                config_file = os.path.join(root, f)
                break
            elif f == 'config.yaml' and config_file is not None:
                pwc(f'Get multiple "config.yaml": "{config_file}" and "{os.path.join(root, f)}"'
                    )
                sys.exit()

    config = load_config(config_file)
    configs = decompose_config(config)

    main = pkg.import_main('train', config=configs.agent)

    main(*configs)
Ejemplo n.º 13
0
Archivo: train.py Proyecto: xlnwel/d2rl
def change_config(kw, model_name, env_config, model_config, agent_config,
                  replay_config):
    """ Changes configs based on kw. model_name will
    be modified accordingly to embody changes 
    """
    if kw:
        for s in kw:
            key, value = s.split('=')
            value = eval_str(value)
            if model_name != '':
                model_name += '-'
            model_name += s

            # change kwargs in config
            configs = []
            config_keys = ['env', 'model', 'agent', 'replay']
            config_values = [
                env_config, model_config, agent_config, replay_config
            ]

            for k, v in model_config.items():
                if isinstance(v, dict):
                    config_keys.append(k)
                    config_values.append(v)
            for name, config in zip(config_keys, config_values):
                if key in config:
                    configs.append((name, config))
            assert configs, f'"{s}" does not appear in any config!'
            if len(configs) > 1:
                pwc(f'All {key} appeared in the following configs will be changed: '
                    + f'{list([n for n, _ in configs])}.',
                    color='cyan')

            for _, c in configs:
                c[key] = value

    return model_name
Ejemplo n.º 14
0
def display_var_info(vars, name='trainable'):
    pwc(f'Print {name} variables', 'yellow')
    count_params = 0
    for v in vars:
        name = v.name
        if '/Adam' in name or 'beta1_power' in name or 'beta2_power' in name:
            continue
        v_params = int(np.prod(v.shape.as_list()))
        count_params += v_params
        if '/b:' in name or '/biases' in name:
            continue  # Wx+b, bias is not interesting to look at => count params, but not print
        pwc(f'   {name}{" "*(100-len(name))} {v_params:d} params {v.shape}',
            'yellow')

    pwc(f'Total model parameters: {count_params*1e-6:0.2f} million', 'yellow')
Ejemplo n.º 15
0
    def __init__(self, log_dir=None, log_file='log.txt'):
        """
        Initialize a Logger.

        Args:
            log_dir (string): A directory for saving results to. If 
                `None/False`, Logger only serves as a storage but doesn't
                write anything to the disk.

            log_file (string): Name for the tab-separated-value file 
                containing metrics logged throughout a training run. 
                Defaults to "log.txt". 
        """
        log_file = log_file if log_file.endswith('log.txt') \
            else log_file + '/log.txt'
        self._log_dir = log_dir
        if self._log_dir:
            path = os.path.join(self._log_dir, log_file)
            if os.path.exists(path) and os.stat(path).st_size != 0:
                i = 1
                name, suffix = path.rsplit('.', 1)
                while os.path.exists(name + f'{i}.' + suffix):
                    i += 1
                pwc(f'Warning: Log file "{path}" already exists!',
                    f'Data will be logged to "{name + f"{i}." + suffix}" instead.',
                    color='magenta')
                path = name + f"{i}." + suffix
            if not os.path.isdir(self._log_dir):
                os.makedirs(self._log_dir)
            self._out_file = open(path, 'w')
            atexit.register(self._out_file.close)
            pwc(f'Logging data to "{self._out_file.name}"', color='green')
        else:
            self._out_file = None
            pwc(
                f'Log directory is not specified, '
                'no data will be logged to the disk',
                color='magenta')

        self._first_row = True
        self._log_headers = []
        self._log_current_row = {}
        self._store_dict = defaultdict(list)
Ejemplo n.º 16
0
if __name__ == '__main__':
    args = parse_eval_args()

    # search for config.yaml
    directory = args.directory
    config_file = None
    for root, _, files in os.walk(directory):
        for f in files:
            if 'src' in root:
                break
            elif f.endswith('config.yaml') and config_file is None:
                config_file = os.path.join(root, f)
                break
            elif f.endswith('config.yaml') and config_file is not None:
                pwc(f'Get multiple "config.yaml": "{config_file}" and "{os.path.join(root, f)}"'
                    )
                sys.exit()

    # load respective config
    config = load_config(config_file)
    env_config = config['env']
    model_config = config['model']
    agent_config = config['agent']
    replay_config = config.get('buffer') or config.get('replay')
    agent_config['logger'] = False

    # get the main function
    try:
        main = pkg.import_main('eval', config=agent_config)
    except:
        print('Default main is used for evaluation')
Ejemplo n.º 17
0
 def print_construction_complete(self):
     pwc(f'{self.name} has been constructed', 'cyan')
Ejemplo n.º 18
0
 def print_construction_complete(self):
     pwc(f'{self.name.upper()} is constructed...', color='cyan')
Ejemplo n.º 19
0
 def print_construction_complete(self):
     pwc(f'Worker {self.no} has been constructed.', 'cyan')
Ejemplo n.º 20
0
 def print_construction_complete(self):
     pwc('Learner has been constructed.', 'cyan')
Ejemplo n.º 21
0
 def set_weights(self, weights):
     pwc('Learner: pull weights from the evaluator', 'blue')
     self.variables.set_flat(weights)
Ejemplo n.º 22
0
        def sample_data(self, learner, evaluator):
            def collect_fn(state, action, reward, done):
                self.buffer.add_data(state, action, reward, done)

            def pull_weights_from_learner():
                # pull weights from learner
                weights = ray.get(learner.get_weights.remote())
                self.variables.set_flat(weights)

            to_record = self.no == 0
            scores = deque(maxlen=self.weight_update_freq)
            epslens = deque(maxlen=self.weight_update_freq)
            best_score_mean = -50
            episode_i = 0
            step = 0
            while True:
                episode_i += 1
                fn = None if to_record else collect_fn
                score, epslen = self.run_trajectory(fn=fn,
                                                    evaluation=to_record)
                step += epslen
                scores.append(score)
                epslens.append(epslen)

                if episode_i % self.weight_update_freq == 0:
                    score_mean = np.mean(scores)
                    if to_record:
                        # record stats
                        stats = dict(
                            Timing='Eval',
                            WorkerNo=self.no,
                            Steps=episode_i,
                            ScoreMean=score_mean,
                            ScoreStd=np.std(scores),
                            ScoreMax=np.max(score),
                            EpslenMean=np.mean(epslens),
                            EpslenStd=np.std(epslens),
                        )
                        tf_stats = dict(worker_no=f'worker_{self.no}')
                        tf_stats.update(stats)

                        learner.record_stats.remote(tf_stats)

                        learner.rl_log.remote(stats)

                    if score_mean > min(250, best_score_mean):
                        best_score_mean = score_mean
                        pwc(
                            f'Worker {self.no}: Best score updated to {best_score_mean:2f}',
                            'blue')
                        evaluator.evaluate_model.remote(
                            self.variables.get_flat(), score_mean)

                    # send data to learner
                    if self.buffer.idx == self.buffer.capacity:
                        last_state = np.zeros_like(self.buffer['state'][0])
                        self.buffer.add_last_state(last_state)
                        self.buffer['priority'][:self.buffer.
                                                idx] = self.compute_priorities(
                                                )
                        # push samples to the central buffer after each episode
                        learner.merge_buffer.remote(dict(self.buffer),
                                                    self.buffer.idx)
                        self.buffer.reset()

                    # pull weights from learner
                    weights = ray.get(learner.get_weights.remote())
                    self.variables.set_flat(weights)
Ejemplo n.º 23
0
def main(env_config,
         model_config,
         agent_config,
         replay_config,
         n,
         record=False,
         size=(128, 128),
         video_len=1000,
         fps=30,
         save=False):
    logging.basicConfig(level=logging.DEBUG)
    silence_tf_logs()
    configure_gpu()
    configure_precision(agent_config.get('precision', 32))

    use_ray = env_config.get('n_workers', 0) > 1
    if use_ray:
        import ray
        ray.init()
        sigint_shutdown_ray()

    algo_name = agent_config['algorithm']
    env_name = env_config['name']

    if record:
        env_config['log_episode'] = True
        env_config['n_workers'] = env_config['n_envs'] = 1

    env = create_env(env_config)

    create_model, Agent = pkg.import_agent(config=agent_config)

    models = create_model(model_config, env)

    agent = Agent(config=agent_config, models=models, dataset=None, env=env)

    if save:
        n_workers = env_config.get('n_workers', 1)
        n_envs = env_config.get('n_envs', 1)
        replay_config['n_envs'] = n_workers * n_envs
        replay_config['replay_type'] = 'uniform'
        replay_config['dir'] = f'data/{agent.name.lower()}-{env.name.lower()}'
        replay_config['n_steps'] = 1
        replay_config['save'] = True
        replay_config['save_temp'] = True
        replay_config['capacity'] = int(1e6)
        replay_config['has_next_obs'] = True
        replay = create_replay(replay_config)

        def collect(obs, action, reward, discount, next_obs, logpi, **kwargs):
            replay.add(obs=obs,
                       action=action,
                       reward=reward,
                       discount=discount,
                       next_obs=next_obs,
                       logpi=logpi)
    else:

        def collect(**kwargs):
            pass

    if n < env.n_envs:
        n = env.n_envs
    scores, epslens, video = evaluate(env,
                                      agent,
                                      n,
                                      record=record,
                                      size=size,
                                      video_len=video_len,
                                      step_fn=collect)
    pwc(f'After running {n} episodes',
        f'Score: {np.mean(scores):.3g}\tEpslen: {np.mean(epslens):.3g}',
        color='cyan')

    if save:
        replay.save()

    if record:
        save_video(f'{algo_name}-{env_name}', video, fps=fps)
    if use_ray:
        ray.shutdown()