def probe_conv(model): acmodel = utils.load_model(model) convweights = acmodel.image_conv.get_submodule('0').state_dict()['weight'] convweights = torch.clip(convweights, -.1, .1).cpu().detach().numpy() fig, axs = plt.subplots(8, 16, sharex=True, sharey=True) fig.subplots_adjust(hspace=0, wspace=0) for i in range(8): for j in range(16): idx = i * 16 + j img = convweights[idx] img = np.moveaxis(img, 0, -1) img = img * 5 + .5 im = axs[i][j].imshow(img) plt.show()
utils.configure_logging(args.model) logger = logging.getLogger(__name__) # Define obss preprocessor if 'emb' in args.arch: obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) else: obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) # Define actor-critic model acmodel = utils.load_model(args.model, raise_not_found=False) if acmodel is None: if args.pretrained_model: acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_desc, args.instr_arch, not args.no_mem, args.arch, random_shuffled=args.random_shuffle, instr_sents=n_floor_colors,
def main(): # Generate environments envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(100 * args.seed + i) envs.append(env) # Define model name suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") instr = args.instr_arch if args.instr_arch else "noinstr" mem = "mem" if not args.no_mem else "nomem" model_name_parts = { 'env': args.env, 'algo': args.algo, 'arch': args.arch, 'instr': instr, 'mem': mem, 'seed': args.seed, 'info': '', 'coef': '', 'suffix': suffix} default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(**model_name_parts) if args.pretrained_model: default_model_name = args.pretrained_model + '_pretrained_' + default_model_name args.model = args.model.format(**model_name_parts) if args.model else default_model_name utils.configure_logging(args.model) logger = logging.getLogger(__name__) # Define obss preprocessor if 'emb' in args.arch: obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) else: """ obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) """ obss_preprocessor = utils.ImgInstrObssPreprocessor(args.model, envs[0].observation_space) # Define actor-critic model acmodel = utils.load_model(args.model, raise_not_found=False) if acmodel is None: if args.pretrained_model: acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: """ acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, args.instr_arch, not args.no_mem, args.arch) """ acmodel = ACModelImgInstr(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, not args.no_mem, args.arch) """ obss_preprocessor.vocab.save() """ utils.save_model(acmodel, args.model) if torch.cuda.is_available(): acmodel.cuda() # Define actor-critic algo reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward if args.algo == "ppo": algo = babyai.rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2, args.gae_lambda, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor, reshape_reward) else: raise ValueError("Incorrect algorithm name: {}".format(args.algo)) # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that. # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here. utils.seed(args.seed) # Restore training status status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') if os.path.exists(status_path): with open(status_path, 'r') as src: status = json.load(src) else: status = {'i': 0, 'num_episodes': 0, 'num_frames': 0} # Define logger and Tensorboard writer and CSV writer header = (["update", "episodes", "frames", "FPS", "duration"] + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']] + ["success_rate"] + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']] + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"]) if args.tb: from tensorboardX import SummaryWriter writer = SummaryWriter(utils.get_log_dir(args.model)) csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') first_created = not os.path.exists(csv_path) # we don't buffer data going in the csv log, cause we assume # that one update will take much longer that one write to the log csv_writer = csv.writer(open(csv_path, 'a', 1)) if first_created: csv_writer.writerow(header) # Log code state, command, availability of CUDA and model babyai_code = list(babyai.__path__)[0] try: last_commit = subprocess.check_output( 'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8') logger.info('LAST COMMIT INFO:') logger.info(last_commit) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') try: diff = subprocess.check_output( 'cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8') if diff: logger.info('GIT DIFF:') logger.info(diff) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') logger.info('COMMAND LINE ARGS:') logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(acmodel) # Train model total_start_time = time.time() best_success_rate = 0 best_mean_return = 0 test_env_name = args.env while status['num_frames'] < args.frames: # Update parameters update_start_time = time.time() logs = algo.update_parameters() update_end_time = time.time() status['num_frames'] += logs["num_frames"] status['num_episodes'] += logs['episodes_done'] status['i'] += 1 # Print logs if status['i'] % args.log_interval == 0: total_ellapsed_time = int(time.time() - total_start_time) fps = logs["num_frames"] / (update_end_time - update_start_time) duration = datetime.timedelta(seconds=total_ellapsed_time) return_per_episode = utils.synthesize(logs["return_per_episode"]) success_per_episode = utils.synthesize( [1 if r > 0 else 0 for r in logs["return_per_episode"]]) num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"]) data = [status['i'], status['num_episodes'], status['num_frames'], fps, total_ellapsed_time, *return_per_episode.values(), success_per_episode['mean'], *num_frames_per_episode.values(), logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"], logs["loss"], logs["grad_norm"]] format_str = ("U {} | E {} | F {:06} | FPS {:04.0f} | D {} | R:xsmM {: .2f} {: .2f} {: .2f} {: .2f} | " "S {:.2f} | F:xsmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | " "pL {: .3f} | vL {:.3f} | L {:.3f} | gN {:.3f} | ") logger.info(format_str.format(*data)) if args.tb: assert len(header) == len(data) for key, value in zip(header, data): writer.add_scalar(key, float(value), status['num_frames']) csv_writer.writerow(data) # Save obss preprocessor vocabulary and model if args.save_interval > 0 and status['i'] % args.save_interval == 0: """ obss_preprocessor.vocab.save() """ with open(status_path, 'w') as dst: json.dump(status, dst) utils.save_model(acmodel, args.model) # Testing the model before saving agent = ModelAgent(args.model, obss_preprocessor, argmax=True) agent.model = acmodel agent.model.eval() logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes) agent.model.train() mean_return = np.mean(logs["return_per_episode"]) success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']]) save_model = False if success_rate > best_success_rate: best_success_rate = success_rate save_model = True elif (success_rate == best_success_rate) and (mean_return > best_mean_return): best_mean_return = mean_return save_model = True if save_model: utils.save_model(acmodel, args.model + '_best') """ obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best')) """ logger.info("Return {: .2f}; best model is saved".format(mean_return)) else: logger.info("Return {: .2f}; not the best model; not saved".format(mean_return))
def __init__( self, args, ): self.args = args utils.seed(self.args.seed) # args.env is a list when training on multiple environments if getattr(args, 'multi_env', None): self.env = [gym.make(item) for item in args.multi_env] self.train_demos = [] for demos, episodes in zip(args.multi_demos, args.multi_episodes): demos_path = utils.get_demos_path(demos, None, None, valid=False) logger.info('loading {} of {} demos'.format(episodes, demos)) train_demos = utils.load_demos(demos_path) logger.info('loaded demos') if episodes > len(train_demos): raise ValueError( "there are only {} train demos in {}".format( len(train_demos), demos)) self.train_demos.extend(train_demos[:episodes]) logger.info('So far, {} demos loaded'.format( len(self.train_demos))) self.val_demos = [] for demos, episodes in zip(args.multi_demos, [args.val_episodes] * len(args.multi_demos)): demos_path_valid = utils.get_demos_path(demos, None, None, valid=True) logger.info('loading {} of {} valid demos'.format( episodes, demos)) valid_demos = utils.load_demos(demos_path_valid) logger.info('loaded demos') if episodes > len(valid_demos): logger.info( 'Using all the available {} demos to evaluate valid. accuracy' .format(len(valid_demos))) self.val_demos.extend(valid_demos[:episodes]) logger.info('So far, {} valid demos loaded'.format( len(self.val_demos))) logger.info('Loaded all demos') observation_space = self.env[0].observation_space action_space = self.env[0].action_space else: self.env = gym.make(self.args.env) demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) print("else") logger.info('loading demos') self.train_demos = utils.load_demos(demos_path) print(len(self.train_demos)) print(self.train_demos[0]) logger.info('loaded demos') if args.episodes: if args.episodes > len(self.train_demos): raise ValueError("there are only {} train demos".format( len(self.train_demos))) self.train_demos = self.train_demos[:args.episodes] self.val_demos = utils.load_demos(demos_path_valid) if args.val_episodes > len(self.val_demos): logger.info( 'Using all the available {} demos to evaluate valid. accuracy' .format(len(self.val_demos))) self.val_demos = self.val_demos[:self.args.val_episodes] observation_space = self.env.observation_space action_space = self.env.action_space print("else") print(args.model) self.obss_preprocessor = utils.ObssPreprocessor( args.model, observation_space, getattr(self.args, 'pretrained_model', None)) # Define actor-critic model self.acmodel = utils.load_model(args.model, raise_not_found=False) if self.acmodel is None: if getattr(self.args, 'pretrained_model', None): self.acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: self.acmodel = ACModel(self.obss_preprocessor.obs_space, action_space, args.image_dim, args.memory_dim, args.instr_dim, not self.args.no_instr, self.args.instr_arch, not self.args.no_mem, self.args.arch) self.obss_preprocessor.vocab.save() utils.save_model(self.acmodel, args.model) self.acmodel.train() if torch.cuda.is_available(): self.acmodel.cuda() self.optimizer = torch.optim.Adam(self.acmodel.parameters(), self.args.lr, eps=self.args.optim_eps) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")
envs[0].observation_space, args.pretrained_model) else: if 'gnn' in args.arch: obss_preprocessor = utils.GraphObssPreprocessor( args.model, envs[0].observation_space, args.pretrained_model) if 'gnn_dense' in args.arch: obss_preprocessor = utils.GraphObssPreprocessorDense( args.model, envs[0].observation_space, args.pretrained_model) else: obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) # Define actor-critic model acmodel = utils.load_model(args.model, raise_not_found=False) acmodel = ACModelGNNDense(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, args.instr_arch, not args.no_mem, args.arch) obss_preprocessor.vocab.save() utils.save_model(acmodel, args.model) # if torch.cuda.is_available(): # acmodel.cuda() # Define actor-critic algo reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward
def main(exp, argv): os.environ["BABYAI_STORAGE"] = exp.results_directory() # Parse arguments parser = ArgumentParser() parser.add_argument("--algo", default='ppo', help="algorithm to use (default: ppo)") parser.add_argument("--discount", type=float, default=0.99, help="discount factor (default: 0.99)") parser.add_argument("--reward-scale", type=float, default=20., help="Reward scale multiplier") parser.add_argument( "--gae-lambda", type=float, default=0.99, help="lambda coefficient in GAE formula (default: 0.99, 1 means no gae)" ) parser.add_argument("--value-loss-coef", type=float, default=0.5, help="value loss term coefficient (default: 0.5)") parser.add_argument("--max-grad-norm", type=float, default=0.5, help="maximum norm of gradient (default: 0.5)") parser.add_argument("--clip-eps", type=float, default=0.2, help="clipping epsilon for PPO (default: 0.2)") parser.add_argument("--ppo-epochs", type=int, default=4, help="number of epochs for PPO (default: 4)") parser.add_argument( "--save-interval", type=int, default=50, help= "number of updates between two saves (default: 50, 0 means no saving)") parser.add_argument("--workers", type=int, default=8, help="number of workers for PyTorch (default: 8)") parser.add_argument("--max-count", type=int, default=1000, help="maximum number of frames to run for") parser.add_argument("--sample_duration", type=float, default=0.5, help="sampling duration") parser.add_argument("--cuda", action="store_true", default=False, help="whether to use cuda") args = parser.parse_args(argv) utils.seed(args.seed) torch_settings = init_torch( seed=args.seed, cuda=args.cuda, workers=args.workers, ) # Generate environments envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(100 * args.seed + i) envs.append(env) # Define model name suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") instr = args.instr_arch if args.instr_arch else "noinstr" mem = "mem" if not args.no_mem else "nomem" model_name_parts = { 'env': args.env, 'algo': args.algo, 'arch': args.arch, 'instr': instr, 'mem': mem, 'seed': args.seed, 'info': '', 'coef': '', 'suffix': suffix } default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format( **model_name_parts) if args.pretrained_model: default_model_name = args.pretrained_model + '_pretrained_' + default_model_name args.model = args.model.format( **model_name_parts) if args.model else default_model_name utils.configure_logging(args.model) logger = logging.getLogger(__name__) # Define obss preprocessor if 'emb' in args.arch: obss_preprocessor = utils.IntObssPreprocessor( args.model, envs[0].observation_space, args.pretrained_model) else: obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) # Define actor-critic model # acmodel = utils.load_model(args.model, raise_not_found=False) acmodel = None if acmodel is None: if args.pretrained_model: acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, args.instr_arch, not args.no_mem, args.arch) obss_preprocessor.vocab.save() # utils.save_model(acmodel, args.model) if torch_settings.cuda: acmodel.cuda() # Define actor-critic algo reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward if args.algo == "ppo": algo = babyai.rl.PPOAlgo( envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2, args.gae_lambda, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor, reshape_reward) else: raise ValueError("Incorrect algorithm name: {}".format(args.algo)) # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that. # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here. utils.seed(args.seed) # Restore training status status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') if os.path.exists(status_path): with open(status_path, 'r') as src: status = json.load(src) else: status = {'i': 0, 'num_episodes': 0, 'num_frames': 0} # # Define logger and Tensorboard writer and CSV writer # header = (["update", "episodes", "frames", "FPS", "duration"] # + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']] # + ["success_rate"] # + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']] # + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"]) # if args.tb: # from tensorboardX import SummaryWriter # writer = SummaryWriter(utils.get_log_dir(args.model)) # csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') # first_created = not os.path.exists(csv_path) # # we don't buffer data going in the csv log, cause we assume # # that one update will take much longer that one write to the log # csv_writer = csv.writer(open(csv_path, 'a', 1)) # if first_created: # csv_writer.writerow(header) # Log code state, command, availability of CUDA and model babyai_code = list(babyai.__path__)[0] try: last_commit = subprocess.check_output( 'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8') logger.info('LAST COMMIT INFO:') logger.info(last_commit) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') try: diff = subprocess.check_output('cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8') if diff: logger.info('GIT DIFF:') logger.info(diff) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') logger.info('COMMAND LINE ARGS:') logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(acmodel) # Train model total_start_time = time.time() best_success_rate = 0 best_mean_return = 0 test_env_name = args.env wrapper = iteration_wrapper( exp, sync=torch_settings.sync, max_count=args.max_count, sample_duration=args.sample_duration, ) # while status['num_frames'] < args.frames: while True: with wrapper() as it: # Update parameters if wrapper.done(): break update_start_time = time.time() logs = algo.update_parameters() update_end_time = time.time() it.set_count(logs["num_frames"]) it.log(loss=logs["loss"], )
args.algo, args.arch, instr, mem, args.seed, fakerewardtxt, suffix) model_name = args.model or default_model_name # Define obss preprocessor obss_preprocessor = utils.ObssPreprocessor(model_name, envs[0].observation_space) # Define actor-critic model acmodel = utils.load_model(model_name, raise_not_found=False) if acmodel is None: acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, args.instr_model, not args.no_mem, args.arch) if torch.cuda.is_available(): acmodel.cuda() # Define actor-critic algo if args.algo == "a2c": algo = torch_rl.A2CAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.gae_tau, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_alpha, args.optim_eps, obss_preprocessor, utils.reshape_reward) elif args.algo == "ppo": algo = torch_rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.gae_tau, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence,
model_names[m] = model_names[m].format( **model_name_parts) if model_name else default_model_name loggers = [] for model_name in model_names[:-1]: loggers.append(utils_sr.configure_logging(model_name)) loggers.append(utils_sr.configure_logging(model_names[-1], stream=True)) # Define obss preprocessor. obss_preprocessor = utils_sr.MultiObssPreprocessor( model_names, [envs[0].observation_space for _ in model_names], pretrained) # Define actor--critic models. models = [] for m, model_name in enumerate(model_names): model = utils.load_model(model_name, raise_not_found=False) if model is None: if pretrained[m]: models.append(utils.load_model(pretrained[m], raise_not_found=True)) else: models.append( ACModel(obss_preprocessor.obs_spaces[m], envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, args.enc_dim, args.dec_dim, args.len_message, args.num_symbols)) else: models.append(model) for m, model in enumerate(models): obss_preprocessor.vocabs[m].save()
def load_model(model): acmodel = utils.load_model(model) vocab = utils.get_vocab_path(model) with open(vocab) as jin: vocab = json.load(jin) return vocab, acmodel
def __init__(self, args): """ :param args: """ super(EvalLearner, self).__init__() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.task_num = args.task_num self.args = args utils.seed(self.args.seed) self.env = gym.make(self.args.env) demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) logger.info('loading demos') self.train_demos = utils.load_demos(demos_path) logger.info('loaded demos') # if args.episodes: # if args.episodes > len(self.train_demos): # raise ValueError("there are only {} train demos".format(len(self.train_demos))) # self.train_demos = self.train_demos[:args.episodes] self.val_demos = utils.load_demos(demos_path_valid) # if args.val_episodes > len(self.val_demos): # logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(self.val_demos))) self.val_demos = self.val_demos[:self.args.val_episodes] observation_space = self.env.observation_space action_space = self.env.action_space print(args.model) self.obss_preprocessor = utils.ObssPreprocessor( args.model, observation_space, getattr(self.args, 'pretrained_model', None)) # Define actor-critic model self.net = utils.load_model(args.model, raise_not_found=True) # if self.net is None: # if getattr(self.args, 'pretrained_model', None): # self.net = utils.load_model(args.pretrained_model, raise_not_found=True) # else: # self.net = ACModel(self.obss_preprocessor.obs_space, action_space, # args.image_dim, args.memory_dim, args.instr_dim, # not self.args.no_instr, self.args.instr_arch, # not self.args.no_mem, self.args.arch) self.obss_preprocessor.vocab.save() # utils.save_model(self.net, args.model) self.fast_net = copy.deepcopy(self.net) self.net.train() self.fast_net.train() if torch.cuda.is_available(): self.net.cuda() self.fast_net.cuda() self.optimizer = torch.optim.SGD(self.fast_net.parameters(), lr=self.args.update_lr) # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)
utils.seed(args.seed) envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(100 * args.seed + i) envs.append(env) penv = ParallelEnv(envs, args.n, args.conventional, args.archimedean, args.informed_sender) # Define obss preprocessor. obss_preprocessor = utils_sr.MultiObssPreprocessor([args.sender, args.receiver], [envs[0].observation_space]*2) # Define actor--critic models. sender = utils.load_model(args.sender) receiver = utils.load_model(args.receiver) if torch.cuda.is_available(): sender.cuda() receiver.cuda() # Define actor--critic algorithm. reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward test_algo = TestAlgo(penv, [sender, receiver], args.frames_per_proc, args.discount, args.gae_lambda, obss_preprocessor, reshape_reward, not args.no_comm, args.conventional, not args.sample) # Test models. sender.eval() receiver.eval() total_start_time = time.time() update_start_time = time.time()