コード例 #1
0
 def run_rl(self):
     '''Run the main RL loop until clock.max_frame'''
     logger.info(
         f'Running RL loop for trial {self.spec["meta"]["trial"]} session {self.index}'
     )
     clock = self.env.clock
     obs = self.env.reset()
     clock.tick('t')
     self.agent.reset(obs)
     done = False
     while True:
         if util.epi_done(done):  # before starting another episode
             logger.nl(f'A dialog session is done')
             self.try_ckpt(self.agent, self.env)
             if clock.get() < clock.max_frame:  # reset and continue
                 clock.tick('epi')
                 obs = self.env.reset()
                 self.agent.reset(obs)
                 done = False
         self.try_ckpt(self.agent, self.env)
         if clock.get() >= clock.max_frame:  # finish
             break
         clock.tick('t')
         action = self.agent.act(obs)
         next_obs, reward, done, info = self.env.step(action)
         self.agent.update(obs, action, reward, next_obs, done)
         obs = next_obs
コード例 #2
0
ファイル: run.py プロジェクト: nlee0212/NeuralPipeline_KoGPT2
def read_spec_and_run(spec_file, spec_name, lab_mode):
    '''Read a spec and run it in lab mode'''
    logger.info(
        f'Running lab spec_file:{spec_file} spec_name:{spec_name} in mode:{lab_mode}'
    )
    if lab_mode in TRAIN_MODES:
        spec = spec_util.get(spec_file, spec_name)
    else:  # eval mode
        if '@' in lab_mode:
            lab_mode, prename = lab_mode.split('@')
            spec = spec_util.get_eval_spec(spec_file, spec_name, prename)
        else:
            spec = spec_util.get(spec_file, spec_name)

    if 'spec_params' not in spec:
        run_spec(spec, lab_mode)
    else:  # spec is parametrized; run them in parallel
        param_specs = spec_util.get_param_specs(spec)
        num_pro = spec['meta']['param_spec_process']
        # can't use Pool since it cannot spawn nested Process, which is needed for VecEnv and parallel sessions. So these will run and wait by chunks
        workers = [
            mp.Process(target=run_spec, args=(spec, lab_mode))
            for spec in param_specs
        ]
        for chunk_w in ps.chunk(workers, num_pro):
            for w in chunk_w:
                w.start()
            for w in chunk_w:
                w.join()
コード例 #3
0
def init_global_nets(algorithm):
    '''
    Initialize global_nets for Hogwild using an identical instance of an algorithm from an isolated Session
    in spec.meta.distributed, specify either:
    - 'shared': global network parameter is shared all the time. In this mode, algorithm local network will be replaced directly by global_net via overriding by identify attribute name
    - 'synced': global network parameter is periodically synced to local network after each gradient push. In this mode, algorithm will keep a separate reference to `global_{net}` for each of its network
    '''
    dist_mode = algorithm.agent.spec['meta']['distributed']
    assert dist_mode in ('shared', 'synced'), f'Unrecognized distributed mode'
    global_nets = {}
    for net_name in algorithm.net_names:
        optim_name = net_name.replace('net', 'optim')
        if not hasattr(
                algorithm,
                optim_name):  # only for trainable network, i.e. has an optim
            continue
        g_net = getattr(algorithm, net_name)
        g_net.share_memory()  # make net global
        if dist_mode == 'shared':  # use the same name to override the local net
            global_nets[net_name] = g_net
        else:  # keep a separate reference for syncing
            global_nets[f'global_{net_name}'] = g_net
        # if optim is Global, set to override the local optim and its scheduler
        optim = getattr(algorithm, optim_name)
        if 'Global' in util.get_class_name(optim):
            optim.share_memory()  # make optim global
            global_nets[optim_name] = optim
            lr_scheduler_name = net_name.replace('net', 'lr_scheduler')
            lr_scheduler = getattr(algorithm, lr_scheduler_name)
            global_nets[lr_scheduler_name] = lr_scheduler
    logger.info(
        f'Initialized global_nets attr {list(global_nets.keys())} for Hogwild')
    return global_nets
コード例 #4
0
def retro_analyze_sessions(predir):
    '''Retro analyze all sessions'''
    logger.info('Running retro_analyze_sessions')
    session_spec_paths = glob(f'{predir}/*_s*_spec.json')
    util.parallelize(_retro_analyze_session,
                     [(p, ) for p in session_spec_paths],
                     num_cpus=util.NUM_CPUS)
コード例 #5
0
ファイル: dqn.py プロジェクト: kiseliu/NeuralPipeline_DSTC8
 def train(self):
     if util.in_eval_lab_modes():
         return np.nan
     clock = self.body.env.clock
     if self.to_train == 1:
         total_loss = torch.tensor(0.0)
         for _ in range(self.training_iter):
             batches = []
             if self.body.warmup_memory.size >= self.body.warmup_memory.batch_size:
                 batches.append(self.warmup_sample())
             if self.body.memory.size >= self.body.memory.batch_size:
                 batches.append(self.sample())
             clock.set_batch_size(sum(len(batch) for batch in batches))
             for batch in batches:
                 for _ in range(self.training_batch_iter):
                     loss = self.calc_q_loss(batch)
                     self.net.train_step(loss,
                                         self.optim,
                                         self.lr_scheduler,
                                         clock=clock,
                                         global_net=self.global_net)
                     total_loss += loss
         loss = total_loss / (self.training_iter * self.training_batch_iter)
         # reset
         self.to_train = 0
         logger.info(
             f'Trained {self.name} at epi: {clock.epi}, warmup_size: {self.body.warmup_memory.size}, memory_size: {self.body.memory.size}, loss: {loss:g}'
         )
         return loss.item()
     else:
         return np.nan
コード例 #6
0
def retro_analyze_trials(predir):
    '''Retro analyze all trials'''
    logger.info('Running retro_analyze_trials')
    session_spec_paths = glob(f'{predir}/*_s*_spec.json')
    # remove session spec paths
    trial_spec_paths = ps.difference(glob(f'{predir}/*_t*_spec.json'), session_spec_paths)
    util.parallelize(_retro_analyze_trial, [(p,) for p in trial_spec_paths], num_cpus=util.NUM_CPUS)
コード例 #7
0
 def save(self, ckpt=None):
     '''Save net models for algorithm given the required property self.net_names'''
     if not hasattr(self, 'net_names'):
         logger.info(
             'No net declared in self.net_names in init_nets(); no models to save.'
         )
     else:
         net_util.save_algorithm(self, ckpt=ckpt)
コード例 #8
0
def set_global_nets(algorithm, global_nets):
    '''For Hogwild, set attr built in init_global_nets above. Use in algorithm init.'''
    # set attr first so algorithm always has self.global_{net} to pass into train_step
    for net_name in algorithm.net_names:
        setattr(algorithm, f'global_{net_name}', None)
    # set attr created in init_global_nets
    if global_nets is not None:
        util.set_attr(algorithm, global_nets)
        logger.info(f'Set global_nets attr {list(global_nets.keys())} for Hogwild')
コード例 #9
0
 def __init__(self, spec, aeb_space):
     self.spec = spec
     self.aeb_space = aeb_space
     aeb_space.env_space = self
     self.info_space = aeb_space.info_space
     self.envs = []
     for e in range(len(self.spec['env'])):
         env = make_env(self.spec, e, env_space=self)
         self.envs.append(env)
     logger.info(util.self_desc(self))
コード例 #10
0
ファイル: base.py プロジェクト: sherlock1987/Dp-without-Adv
 def post_init_nets(self):
     '''
     Method to conditionally load models.
     Call at the end of init_nets() after setting self.net_names
     '''
     assert hasattr(self, 'net_names')
     if util.in_eval_lab_modes():
         logger.info(f'Loaded algorithm models for lab_mode: {util.get_lab_mode()}')
         self.load()
     else:
         logger.info(f'Initialized algorithm models for lab_mode: {util.get_lab_mode()}')
コード例 #11
0
 def log_summary(self, df_mode):
     '''
     Log the summary for this body when its environment is done
     @param str:df_mode 'train' or 'eval'
     '''
     prefix = self.get_log_prefix()
     df = getattr(self, f'{df_mode}_df')
     last_row = df.iloc[-1]
     row_str = '  '.join([f'{k}: {v:g}' for k, v in last_row.items()])
     msg = f'{prefix} [{df_mode}_df] {row_str}'
     logger.info(msg)
コード例 #12
0
ファイル: base.py プロジェクト: sherlock1987/Dp-without-Adv
 def load(self):
     '''Load net models for algorithm given the required property self.net_names'''
     if not hasattr(self, 'net_names'):
         logger.info('No net declared in self.net_names in init_nets(); no models to load.')
     else:
         net_util.load_algorithm(self)
     # set decayable variables to final values
     for k, v in vars(self).items():
         if k.endswith('_scheduler'):
             var_name = k.replace('_scheduler', '')
             if hasattr(v, 'end_val'):
                 setattr(self.body, var_name, v.end_val)
コード例 #13
0
 def run_eval(self):
     avg_return, avg_len, avg_success, avg_p, avg_r, avg_f1, avg_book_rate = analysis.gen_avg_result(self.agent, self.eval_env, self.num_eval) 
     result = f'{self.num_eval} episodes, {avg_return:.2f} return'
     if not avg_success is None:
         result += f', {avg_success*100:.2f}% success rate'
     if avg_len:
         result += f', {avg_len:.2f} turns'
     if avg_p:
         result += f', {avg_p:.2f} P, {avg_r:.2f} R, {avg_f1:.2f} F1'
     if avg_book_rate:
         result += f', {avg_book_rate*100:.2f}% book rate'
     logger.info(result)
コード例 #14
0
def retro_analyze_experiment(predir):
    '''Retro analyze an experiment'''
    logger.info('Running retro_analyze_experiment')
    trial_spec_paths = glob(f'{predir}/*_t*_spec.json')
    # remove trial and session spec paths
    experiment_spec_paths = ps.difference(glob(f'{predir}/*_spec.json'), trial_spec_paths)
    experiment_spec_path = experiment_spec_paths[0]
    spec = util.read(experiment_spec_path)
    info_prepath = spec['meta']['info_prepath']
    if os.path.exists(f'{info_prepath}_trial_data_dict.json'):
        return  # only run analysis if experiment had been ran
    trial_data_dict = util.read(f'{info_prepath}_trial_data_dict.json')
    analysis.analyze_experiment(spec, trial_data_dict)
コード例 #15
0
def analyze_trial(trial_spec, session_metrics_list):
    '''Analyze trial and save data, then return metrics'''
    info_prepath = trial_spec['meta']['info_prepath']
    # calculate metrics
    trial_metrics = calc_trial_metrics(session_metrics_list, info_prepath)
    # plot graphs
    viz.plot_trial(trial_spec, trial_metrics)
    # zip files
    if util.get_lab_mode() == 'train':
        predir, _, _, _, _, _ = util.prepath_split(info_prepath)
        shutil.make_archive(predir, 'zip', predir)
        logger.info(f'All trial data zipped to {predir}.zip')
    return trial_metrics
コード例 #16
0
def analyze_experiment(spec, trial_data_dict):
    '''Analyze experiment and save data'''
    info_prepath = spec['meta']['info_prepath']
    util.write(trial_data_dict, f'{info_prepath}_trial_data_dict.json')
    # calculate experiment df
    experiment_df = calc_experiment_df(trial_data_dict, info_prepath)
    # plot graph
    viz.plot_experiment(spec, experiment_df, METRICS_COLS)
    # zip files
    predir, _, _, _, _, _ = util.prepath_split(info_prepath)
    shutil.make_archive(predir, 'zip', predir)
    logger.info(f'All experiment data zipped to {predir}.zip')
    return experiment_df
コード例 #17
0
    def __init__(self, spec, e=None):
        super(MultiWozEnv, self).__init__(spec, e)
        self.action_dim = self.observation_dim = 0
        util.set_attr(self, self.env_spec, [
            'observation_dim',
            'action_dim',
        ])
        worker_id = int(f'{os.getpid()}{self.e+int(ps.unique_id())}'[-4:])
        self.u_env = MultiWozEnvironment(self.env_spec, worker_id, self.action_dim)
        self.evaluator = self.u_env.evaluator
        self.patch_gym_spaces(self.u_env)
        self._set_attr_from_u_env(self.u_env)

        logger.info(util.self_desc(self))
コード例 #18
0
def retro_analyze(predir):
    '''
    Method to analyze experiment/trial from files after it ran.
    @example

    yarn retro_analyze data/reinforce_cartpole_2018_01_22_211751/
    '''
    predir = predir.strip('/')  # sanitary
    os.environ['LOG_PREPATH'] = f'{predir}/log/retro_analyze'  # to prevent overwriting log file
    logger.info(f'Running retro-analysis on {predir}')
    retro_analyze_sessions(predir)
    retro_analyze_trials(predir)
    retro_analyze_experiment(predir)
    logger.info('Finished retro-analysis')
コード例 #19
0
 def __init__(self, spec, body, a=None, global_nets=None):
     self.spec = spec
     self.a = a or 0  # for compatibility with agent_space
     self.agent_spec = spec['agent'][self.a]
     self.name = self.agent_spec['name']
     assert not ps.is_list(
         global_nets
     ), f'single agent global_nets must be a dict, got {global_nets}'
     self.nlu = None
     if 'nlu' in self.agent_spec:
         params = deepcopy(ps.get(self.agent_spec, 'nlu'))
         NluClass = getattr(nlu, params.pop('name'))
         self.nlu = NluClass(**params)
     self.dst = None
     if 'dst' in self.agent_spec:
         params = deepcopy(ps.get(self.agent_spec, 'dst'))
         DstClass = getattr(dst, params.pop('name'))
         self.dst = DstClass(**params)
     if 'word_dst' in self.agent_spec:
         params = deepcopy(ps.get(self.agent_spec, 'word_dst'))
         DstClass = getattr(word_dst, params.pop('name'))
         self.dst = DstClass(**params)
     self.state_encoder = None
     if 'state_encoder' in self.agent_spec:
         params = deepcopy(ps.get(self.agent_spec, 'state_encoder'))
         StateEncoderClass = getattr(state_encoder, params.pop('name'))
         self.state_encoder = StateEncoderClass(**params)
     self.action_decoder = None
     if 'action_decoder' in self.agent_spec:
         params = deepcopy(ps.get(self.agent_spec, 'action_decoder'))
         ActionDecoderClass = getattr(action_decoder, params.pop('name'))
         self.action_decoder = ActionDecoderClass(**params)
     self.nlg = None
     if 'nlg' in self.agent_spec:
         params = deepcopy(ps.get(self.agent_spec, 'nlg'))
         NlgClass = getattr(nlg, params.pop('name'))
         self.nlg = NlgClass(**params)
     self.body = body
     body.agent = self
     AlgorithmClass = getattr(algorithm,
                              ps.get(self.agent_spec, 'algorithm.name'))
     self.algorithm = AlgorithmClass(self, global_nets)
     if ps.get(self.agent_spec, 'memory'):
         MemoryClass = getattr(memory, ps.get(self.agent_spec,
                                              'memory.name'))
         self.body.memory = MemoryClass(self.agent_spec['memory'],
                                        self.body)
     self.warmup_epi = ps.get(self.agent_spec, 'algorithm.warmup_epi') or -1
     self.body.state, self.body.encoded_state, self.body.action = None, None, None
     logger.info(util.self_desc(self))
コード例 #20
0
 def __init__(self, agent, global_nets=None):
     '''
     @param {*} agent is the container for algorithm and related components, and interfaces with env.
     '''
     self.agent = agent
     self.algorithm_spec = agent.agent_spec['algorithm']
     self.name = self.algorithm_spec['name']
     self.net_spec = agent.agent_spec.get('net', None)
     if ps.get(agent.agent_spec, 'memory'):
         self.memory_spec = agent.agent_spec['memory']
     self.body = self.agent.body
     self.init_algorithm_params()
     self.init_nets(global_nets)
     logger.info(util.self_desc(self))
コード例 #21
0
    def __init__(self, spec, global_nets=None):
        self.spec = spec
        self.index = self.spec['meta']['session']
        util.set_random_seed(self.spec)
        util.set_cuda_id(self.spec)
        util.set_logger(self.spec, logger, 'session')
        spec_util.save(spec, unit='session')

        self.agent, self.env = make_agent_env(self.spec, global_nets)
        with util.ctx_lab_mode('eval'):  # env for eval
            self.eval_env = make_env(self.spec)
            self.agent.body.eval_env = self.eval_env 
        self.num_eval = ps.get(self.agent.spec, 'meta.num_eval')
        self.warmup_epi = ps.get(self.agent.agent_spec, 'algorithm.warmup_epi') or -1 
        logger.info(util.self_desc(self))
コード例 #22
0
    def __init__(self, spec, body, a=None, global_nets=None):
        self.spec = spec
        self.a = a or 0  # for multi-agent
        self.agent_spec = spec['agent'][self.a]
        self.name = self.agent_spec['name']
        assert not ps.is_list(global_nets), f'single agent global_nets must be a dict, got {global_nets}'
        # set components
        self.body = body
        body.agent = self
        MemoryClass = getattr(memory, ps.get(self.agent_spec, 'memory.name'))
        self.body.memory = MemoryClass(self.agent_spec['memory'], self.body)
        AlgorithmClass = getattr(algorithm, ps.get(self.agent_spec, 'algorithm.name'))
        self.algorithm = AlgorithmClass(self, global_nets)

        logger.info(util.self_desc(self))
コード例 #23
0
    def check_fn(*args, **kwargs):
        if not to_check_train_step():
            return fn(*args, **kwargs)

        net = args[0]  # first arg self
        # get pre-update parameters to compare
        pre_params = [param.clone() for param in net.parameters()]

        # run train_step, get loss
        loss = fn(*args, **kwargs)
        assert not torch.isnan(loss).any(), loss

        # get post-update parameters to compare
        post_params = [param.clone() for param in net.parameters()]
        if loss == 0.0:
            # if loss is 0, there should be no updates
            # TODO if without momentum, parameters should not change too
            for p_name, param in net.named_parameters():
                assert param.grad.norm() == 0
        else:
            # check parameter updates
            try:
                assert not all(
                    torch.equal(w1, w2)
                    for w1, w2 in zip(pre_params, post_params)
                ), f'Model parameter is not updated in train_step(), check if your tensor is detached from graph. Loss: {loss:g}'
                logger.info(
                    f'Model parameter is updated in train_step(). Loss: {loss: g}'
                )
            except Exception as e:
                logger.error(e)
                if os.environ.get('PY_ENV') == 'test':
                    # raise error if in unit test
                    raise (e)

            # check grad norms
            min_norm, max_norm = 0.0, 1e5
            for p_name, param in net.named_parameters():
                try:
                    grad_norm = param.grad.norm()
                    assert min_norm < grad_norm < max_norm, f'Gradient norm for {p_name} is {grad_norm:g}, fails the extreme value check {min_norm} < grad_norm < {max_norm}. Loss: {loss:g}. Check your network and loss computation.'
                except Exception as e:
                    logger.warning(e)
            logger.info(f'Gradient norms passed value check.')
        logger.debug('Passed network parameter update check.')
        # store grad norms for debugging
        net.store_grad_norms()
        return loss
コード例 #24
0
def check_all():
    '''Check all spec files, all specs.'''
    spec_files = ps.filter_(os.listdir(SPEC_DIR), lambda f: f.endswith('.json') and not f.startswith('_'))
    for spec_file in spec_files:
        spec_dict = util.read(f'{SPEC_DIR}/{spec_file}')
        for spec_name, spec in spec_dict.items():
            # fill-in info at runtime
            spec['name'] = spec_name
            spec = extend_meta_spec(spec)
            try:
                check(spec)
            except Exception as e:
                logger.exception(f'spec_file {spec_file} fails spec check')
                raise e
    logger.info(f'Checked all specs from: {ps.join(spec_files, ",")}')
    return True
コード例 #25
0
ファイル: ppo.py プロジェクト: sherlock1987/SeqReward
 def airl_train(self, training_times=1):
     # print("airl training")
     for t in range(training_times):
         total_loss = 0
         # idx = min(t+1, len(self.experience_buffer))
         batch = self.experience_buffer[-1]
         minibatches = util.split_minibatch(batch, 64)
         # print("minibatch number: {}".format(len(minibatches)))
         for fake_batch in minibatches:
             self.optim_disc.zero_grad()
             loss = self.discriminator.disc_train(fake_batch)
             total_loss += loss.item()
             loss.backward()
             self.optim_disc.step()
             for p in self.discriminator.parameters():
                 p.data.clamp_(-0.1, 0.1)
         logger.info("airl training loss: {}".format(total_loss/len(minibatches)))
コード例 #26
0
 def train(self):
     if util.in_eval_lab_modes():
         return np.nan
     clock = self.body.env.clock
     if self.to_train == 1:
         batch = self.sample()
         clock.set_batch_size(len(batch))
         pdparams = self.calc_pdparam_batch(batch)
         advs = self.calc_ret_advs(batch)
         loss = self.calc_policy_loss(batch, pdparams, advs)
         self.net.train_step(loss, self.optim, self.lr_scheduler, clock=clock, global_net=self.global_net)
         # reset
         self.to_train = 0
         logger.info(f'Trained {self.name} at epi: {clock.epi}, frame: {clock.frame}, t: {clock.t}, total_reward so far: {self.body.total_reward}, loss: {loss:g}')
         return loss.item()
     else:
         return np.nan
コード例 #27
0
def load_algorithm(algorithm):
    '''Save all the nets for an algorithm'''
    agent = algorithm.agent
    net_names = algorithm.net_names
    if util.in_eval_lab_modes():
        # load specific model in eval mode
        model_prepath = agent.spec['meta']['eval_model_prepath']
    else:
        model_prepath = agent.spec['meta']['model_prepath']
    logger.info(f'Loading algorithm {util.get_class_name(algorithm)} nets {net_names} from {model_prepath}_*.pt')
    for net_name in net_names:
        net = getattr(algorithm, net_name)
        model_path = f'{model_prepath}_{net_name}_model.pt'
        load(net, model_path)
        optim_name = net_name.replace('net', 'optim')
        optim = getattr(algorithm, optim_name, None)
        if optim is not None:  # only trainable net has optim
            optim_path = f'{model_prepath}_{net_name}_optim.pt'
            load(optim, optim_path)
コード例 #28
0
def run_ray_search(spec):
    '''
    Method to run ray search from experiment. Uses RandomSearch now.
    TODO support for other ray search algorithms: https://ray.readthedocs.io/en/latest/tune-searchalg.html
    '''
    logger.info(f'Running ray search for spec {spec["name"]}')
    # generate trial index to pass into Lab Trial
    global trial_index  # make gen_trial_index passable into ray.run
    trial_index = -1

    def gen_trial_index():
        global trial_index
        trial_index += 1
        return trial_index

    ray.init()

    ray_trials = tune.run(
        ray_trainable,
        name=spec['name'],
        config={
            "spec": spec,
            "trial_index": tune.sample_from(lambda spec: gen_trial_index()),
            **build_config_space(spec)
        },
        resources_per_trial=infer_trial_resources(spec),
        num_samples=spec['meta']['max_trial'],
        queue_trials=True,
    )
    trial_data_dict = {}  # data for Lab Experiment to analyze
    for ray_trial in ray_trials:
        ray_trial_data = ray_trial.last_result['trial_data']
        trial_data_dict.update(ray_trial_data)

    ray.shutdown()
    return trial_data_dict
コード例 #29
0
 def train(self):
     if util.in_eval_lab_modes():
         return np.nan
     clock = self.body.env.clock
     # import pdb; pdb.set_trace()
     # self.batch_count = 0
     # print("***********")
     if self.to_train == 1:
         # print("===========")
         self.reward_agent.eval()
         total_loss = torch.tensor(0.0)
         self.reward_count = 0
         self.batch_count = 0
         for _ in range(self.training_iter):
             batches = []
             warmup = False
             if self.body.warmup_memory.size >= self.body.warmup_memory.batch_size:
                 batches.append(self.warmup_sample())
                 # if self.body.env.clock.frame < 100000:
                 #     batches.append(self.warmup_sample())
                 # else:
                 #     batches.append(self.sample())
                 warmup = True
             if self.body.memory.size >= self.body.memory.batch_size:
                 batches.append(self.sample())
             clock.set_batch_size(sum(len(batch) for batch in batches))
             for idx, batch in enumerate(batches):
                 for _ in range(self.training_batch_iter):
                     loss = self.calc_q_loss(batch, False)
                     self.net.train_step(loss,
                                         self.optim,
                                         self.lr_scheduler,
                                         clock=clock,
                                         global_net=self.global_net)
                     total_loss += loss
         loss = total_loss / (self.training_iter * self.training_batch_iter)
         reward_irl = self.reward_count / self.batch_count
         logger.info("***********")
         logger.info(reward_irl)
         # reset
         self.to_train = 0
         logger.info(
             f'Trained {self.name} at epi: {clock.epi}, warmup_size: {self.body.warmup_memory.size}, memory_size: {self.body.memory.size}, loss: {loss:g}, irl_reward: {reward_irl}'
         )
         # logger.info(f'Trained {self.name} at epi: {clock.epi}, warmup_size: {self.body.warmup_memory.size}, memory_size: {self.body.memory.size}, loss: {loss:g}')
         return loss.item()
     else:
         return np.nan
コード例 #30
0
 def log_metrics(self, metrics, df_mode):
     '''Log session metrics'''
     prefix = self.get_log_prefix()
     row_str = '  '.join([f'{k}: {v:g}' for k, v in metrics.items()])
     msg = f'{prefix} [{df_mode}_df metrics] {row_str}'
     logger.info(msg)