def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent env = gym.make(args.env) env.seed(seed) agent = utils.load_agent(env, args.model, None, None, args.argmax, args.env) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes: logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, args.env, seed, episodes, return_obss_actions=True) return logs
def main_test(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent # do test environment env_name = args.env + "_Test-v0" env = gym.make(env_name) env.seed(seed) agent = utils.load_agent(env, args.model, argmax=args.argmax, env_name=env_name) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent): logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, env_name, seed, episodes) return logs
def validate(self, episodes, verbose=True): # Seed needs to be reset for each validation, to ensure consistency utils.seed(self.args.val_seed) if verbose: logger.info("Validating the model") if getattr(self.args, 'multi_env', None): agent = utils.load_agent(self.env[0], model_name=self.args.model, argmax=True) else: agent = utils.load_agent(self.env, model_name=self.args.model, argmax=True) # Setting the agent model to the current model agent.model = self.acmodel agent.model.eval() logs = [] for env_name in ([self.args.env] if not getattr(self.args, 'multi_env', None) else self.args.multi_env): logs += [ batch_evaluate(agent, env_name, self.args.val_seed, episodes) ] agent.model.train() return logs
def __init__(self, model, env, args, distill_with_teacher, reward_predictor=False): self.args = args self.distill_with_teacher = distill_with_teacher self.reward_predictor = reward_predictor utils.seed(self.args.seed) self.env = env observation_space = self.env.observation_space action_space = self.env.action_space # Define actor-critic model self.acmodel = model utils.save_model(self.acmodel, args.model) self.acmodel.train() if torch.cuda.is_available(): self.acmodel.cuda() self.optimizer = torch.optim.Adam(self.acmodel.parameters(), self.args.lr, eps=self.args.optim_eps) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") print("DEVICE", self.device)
def __init__(self, args): self.args = args # seeding utils.seed(args.seed) self.env = gym.make(id=args.env) self.episodes = 300 # args.episodes self.horizon = self.env.max_steps self.initial_decay = 0.99 # args.decay self.observation_preprocessor = utils.ObssPreprocessor( model_name=args.model, obs_space=self.env.observation_space, load_vocab_from=getattr(self.args, 'pretrained_model', None)) # TODO: for now I am only running the small model self.model = models.ACModel(obs_space=self.env.observation_space, action_space=self.env.action_space) self.learner = ModelAgent( model_or_name=self.model, obss_preprocessor=self.observation_preprocessor, argmax=True) self.teacher = Bot(self.env) self.data = [] self.observation_preprocessor.vocab.save() utils.save_model(self.model, args.model) self.model.train() if torch.cuda.is_available(): self.model.cuda() self.optimizer = torch.optim.Adam(self.model.parameters(), self.args.lr, eps=self.args.optim_eps) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") if self.device.type == 'cpu': print('running on cpu...')
def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent env = gym.make(args.env) env.seed(seed) agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) if args.proj is not None: assert args.proj_file is not None if args.proj_file is not None: with open(args.proj_file, newline="") as reader: proj_sentences = reader.readlines() else: proj_sentences = None seeds = [] orig_missions = [] missions = [] with open(args.turk_file, newline="") as reader: csv_reader = csv.reader(reader) header = next(csv_reader) i_seed = header.index("Input.seed") i_orig_dir = header.index("Input.cmd") i_mission = header.index("Answer.command") for row in csv_reader: seeds.append(int(row[i_seed])) orig_missions.append(row[i_orig_dir]) missions.append(row[i_mission]) if not args.human: logs = evaluate_fixed_seeds(agent, env, episodes, seeds, orig_missions) else: logs = evaluate_fixed_seeds(agent, env, episodes, seeds, orig_missions, missions, args.proj, proj_sentences) return logs
def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Define agent env = gym.make(args.env) env.seed(seed) agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, args.env) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Evaluate if isinstance(agent, utils.ModelAgent) and not args.contiguous_episodes: logs = batch_evaluate(agent, args.env, seed, episodes) else: logs = evaluate(agent, env, episodes, False) return logs
action_map = { "LEFT": "left", "RIGHT": "right", "UP": "forward", "PAGE_UP": "pickup", "PAGE_DOWN": "drop", "SPACE": "toggle" } if args.seed is None: args.seed = 0 if args.model is not None else 1 # Set seed for all randomness sources utils.seed(args.seed) # Generate environment env = gym.make(args.env) env.seed(args.seed) for _ in range(args.shift): env.reset() global obs obs, info = env.reset() print("Mission: {}".format(obs["mission"])) # Define agent and load trained model agent = machine.util.load_agent(env, args.model, args.argmax, args.env, args.vocab)
def main(): # Generate environments envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(100 * args.seed + i) envs.append(env) # Define model name suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") instr = args.instr_arch if args.instr_arch else "noinstr" mem = "mem" if not args.no_mem else "nomem" model_name_parts = { 'env': args.env, 'algo': args.algo, 'arch': args.arch, 'instr': instr, 'mem': mem, 'seed': args.seed, 'info': '', 'coef': '', 'suffix': suffix} default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format(**model_name_parts) if args.pretrained_model: default_model_name = args.pretrained_model + '_pretrained_' + default_model_name args.model = args.model.format(**model_name_parts) if args.model else default_model_name utils.configure_logging(args.model) logger = logging.getLogger(__name__) # Define obss preprocessor if 'emb' in args.arch: obss_preprocessor = utils.IntObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) else: """ obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) """ obss_preprocessor = utils.ImgInstrObssPreprocessor(args.model, envs[0].observation_space) # Define actor-critic model acmodel = utils.load_model(args.model, raise_not_found=False) if acmodel is None: if args.pretrained_model: acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: """ acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, args.instr_arch, not args.no_mem, args.arch) """ acmodel = ACModelImgInstr(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, not args.no_mem, args.arch) """ obss_preprocessor.vocab.save() """ utils.save_model(acmodel, args.model) if torch.cuda.is_available(): acmodel.cuda() # Define actor-critic algo reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward if args.algo == "ppo": algo = babyai.rl.PPOAlgo(envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2, args.gae_lambda, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor, reshape_reward) else: raise ValueError("Incorrect algorithm name: {}".format(args.algo)) # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that. # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here. utils.seed(args.seed) # Restore training status status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') if os.path.exists(status_path): with open(status_path, 'r') as src: status = json.load(src) else: status = {'i': 0, 'num_episodes': 0, 'num_frames': 0} # Define logger and Tensorboard writer and CSV writer header = (["update", "episodes", "frames", "FPS", "duration"] + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']] + ["success_rate"] + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']] + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"]) if args.tb: from tensorboardX import SummaryWriter writer = SummaryWriter(utils.get_log_dir(args.model)) csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') first_created = not os.path.exists(csv_path) # we don't buffer data going in the csv log, cause we assume # that one update will take much longer that one write to the log csv_writer = csv.writer(open(csv_path, 'a', 1)) if first_created: csv_writer.writerow(header) # Log code state, command, availability of CUDA and model babyai_code = list(babyai.__path__)[0] try: last_commit = subprocess.check_output( 'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8') logger.info('LAST COMMIT INFO:') logger.info(last_commit) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') try: diff = subprocess.check_output( 'cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8') if diff: logger.info('GIT DIFF:') logger.info(diff) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') logger.info('COMMAND LINE ARGS:') logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(acmodel) # Train model total_start_time = time.time() best_success_rate = 0 best_mean_return = 0 test_env_name = args.env while status['num_frames'] < args.frames: # Update parameters update_start_time = time.time() logs = algo.update_parameters() update_end_time = time.time() status['num_frames'] += logs["num_frames"] status['num_episodes'] += logs['episodes_done'] status['i'] += 1 # Print logs if status['i'] % args.log_interval == 0: total_ellapsed_time = int(time.time() - total_start_time) fps = logs["num_frames"] / (update_end_time - update_start_time) duration = datetime.timedelta(seconds=total_ellapsed_time) return_per_episode = utils.synthesize(logs["return_per_episode"]) success_per_episode = utils.synthesize( [1 if r > 0 else 0 for r in logs["return_per_episode"]]) num_frames_per_episode = utils.synthesize(logs["num_frames_per_episode"]) data = [status['i'], status['num_episodes'], status['num_frames'], fps, total_ellapsed_time, *return_per_episode.values(), success_per_episode['mean'], *num_frames_per_episode.values(), logs["entropy"], logs["value"], logs["policy_loss"], logs["value_loss"], logs["loss"], logs["grad_norm"]] format_str = ("U {} | E {} | F {:06} | FPS {:04.0f} | D {} | R:xsmM {: .2f} {: .2f} {: .2f} {: .2f} | " "S {:.2f} | F:xsmM {:.1f} {:.1f} {} {} | H {:.3f} | V {:.3f} | " "pL {: .3f} | vL {:.3f} | L {:.3f} | gN {:.3f} | ") logger.info(format_str.format(*data)) if args.tb: assert len(header) == len(data) for key, value in zip(header, data): writer.add_scalar(key, float(value), status['num_frames']) csv_writer.writerow(data) # Save obss preprocessor vocabulary and model if args.save_interval > 0 and status['i'] % args.save_interval == 0: """ obss_preprocessor.vocab.save() """ with open(status_path, 'w') as dst: json.dump(status, dst) utils.save_model(acmodel, args.model) # Testing the model before saving agent = ModelAgent(args.model, obss_preprocessor, argmax=True) agent.model = acmodel agent.model.eval() logs = batch_evaluate(agent, test_env_name, args.val_seed, args.val_episodes) agent.model.train() mean_return = np.mean(logs["return_per_episode"]) success_rate = np.mean([1 if r > 0 else 0 for r in logs['return_per_episode']]) save_model = False if success_rate > best_success_rate: best_success_rate = success_rate save_model = True elif (success_rate == best_success_rate) and (mean_return > best_mean_return): best_mean_return = mean_return save_model = True if save_model: utils.save_model(acmodel, args.model + '_best') """ obss_preprocessor.vocab.save(utils.get_vocab_path(args.model + '_best')) """ logger.info("Return {: .2f}; best model is saved".format(mean_return)) else: logger.info("Return {: .2f}; not the best model; not saved".format(mean_return))
def __init__( self, args, ): self.args = args utils.seed(self.args.seed) # args.env is a list when training on multiple environments if getattr(args, 'multi_env', None): self.env = [gym.make(item) for item in args.multi_env] self.train_demos = [] for demos, episodes in zip(args.multi_demos, args.multi_episodes): demos_path = utils.get_demos_path(demos, None, None, valid=False) logger.info('loading {} of {} demos'.format(episodes, demos)) train_demos = utils.load_demos(demos_path) logger.info('loaded demos') if episodes > len(train_demos): raise ValueError( "there are only {} train demos in {}".format( len(train_demos), demos)) self.train_demos.extend(train_demos[:episodes]) logger.info('So far, {} demos loaded'.format( len(self.train_demos))) self.val_demos = [] for demos, episodes in zip(args.multi_demos, [args.val_episodes] * len(args.multi_demos)): demos_path_valid = utils.get_demos_path(demos, None, None, valid=True) logger.info('loading {} of {} valid demos'.format( episodes, demos)) valid_demos = utils.load_demos(demos_path_valid) logger.info('loaded demos') if episodes > len(valid_demos): logger.info( 'Using all the available {} demos to evaluate valid. accuracy' .format(len(valid_demos))) self.val_demos.extend(valid_demos[:episodes]) logger.info('So far, {} valid demos loaded'.format( len(self.val_demos))) logger.info('Loaded all demos') observation_space = self.env[0].observation_space action_space = self.env[0].action_space else: self.env = gym.make(self.args.env) demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) print("else") logger.info('loading demos') self.train_demos = utils.load_demos(demos_path) print(len(self.train_demos)) print(self.train_demos[0]) logger.info('loaded demos') if args.episodes: if args.episodes > len(self.train_demos): raise ValueError("there are only {} train demos".format( len(self.train_demos))) self.train_demos = self.train_demos[:args.episodes] self.val_demos = utils.load_demos(demos_path_valid) if args.val_episodes > len(self.val_demos): logger.info( 'Using all the available {} demos to evaluate valid. accuracy' .format(len(self.val_demos))) self.val_demos = self.val_demos[:self.args.val_episodes] observation_space = self.env.observation_space action_space = self.env.action_space print("else") print(args.model) self.obss_preprocessor = utils.ObssPreprocessor( args.model, observation_space, getattr(self.args, 'pretrained_model', None)) # Define actor-critic model self.acmodel = utils.load_model(args.model, raise_not_found=False) if self.acmodel is None: if getattr(self.args, 'pretrained_model', None): self.acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: self.acmodel = ACModel(self.obss_preprocessor.obs_space, action_space, args.image_dim, args.memory_dim, args.instr_dim, not self.args.no_instr, self.args.instr_arch, not self.args.no_mem, self.args.arch) self.obss_preprocessor.vocab.save() utils.save_model(self.acmodel, args.model) self.acmodel.train() if torch.cuda.is_available(): self.acmodel.cuda() self.optimizer = torch.optim.Adam(self.acmodel.parameters(), self.args.lr, eps=self.args.optim_eps) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")
def generate_demos(n_episodes, valid, seed, shift=0): utils.seed(seed) # Generate environment env = gym.make(args.env) env.seed(seed) for i in range(shift): env.reset() agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax, args.env) demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid) demos = [] checkpoint_time = time.time() while True: # Run the expert for one episode done = False obs = env.reset() agent.on_reset() actions = [] mission = obs["mission"] images = [] directions = [] try: while not done: action = agent.act(obs)['action'] if isinstance(action, torch.Tensor): action = action.item() new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) actions.append(action) images.append(obs['image']) directions.append(obs['direction']) obs = new_obs if reward > 0 and (args.filter_steps == 0 or len(images) <= args.filter_steps): demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) if len(demos) >= n_episodes: break if reward == 0: if args.on_exception == 'crash': raise Exception("mission failed") logger.info("mission failed") except Exception: if args.on_exception == 'crash': raise logger.exception("error while generating demo #{}".format( len(demos))) continue if len(demos) and len(demos) % args.log_interval == 0: now = time.time() demos_per_second = args.log_interval / (now - checkpoint_time) to_go = (n_episodes - len(demos)) / demos_per_second logger.info( "demo #{}, {:.3f} demos per second, {:.3f} seconds to go". format(len(demos), demos_per_second, to_go)) checkpoint_time = now # Save demonstrations if args.save_interval > 0 and len( demos) < n_episodes and len(demos) % args.save_interval == 0: logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("Demos saved") # print statistics for the last 100 demonstrations print_demo_lengths(demos[-100:]) # Save demonstrations logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("Demos saved") print_demo_lengths(demos[-100:])
def main(exp, argv): os.environ["BABYAI_STORAGE"] = exp.results_directory() # Parse arguments parser = ArgumentParser() parser.add_argument("--algo", default='ppo', help="algorithm to use (default: ppo)") parser.add_argument("--discount", type=float, default=0.99, help="discount factor (default: 0.99)") parser.add_argument("--reward-scale", type=float, default=20., help="Reward scale multiplier") parser.add_argument( "--gae-lambda", type=float, default=0.99, help="lambda coefficient in GAE formula (default: 0.99, 1 means no gae)" ) parser.add_argument("--value-loss-coef", type=float, default=0.5, help="value loss term coefficient (default: 0.5)") parser.add_argument("--max-grad-norm", type=float, default=0.5, help="maximum norm of gradient (default: 0.5)") parser.add_argument("--clip-eps", type=float, default=0.2, help="clipping epsilon for PPO (default: 0.2)") parser.add_argument("--ppo-epochs", type=int, default=4, help="number of epochs for PPO (default: 4)") parser.add_argument( "--save-interval", type=int, default=50, help= "number of updates between two saves (default: 50, 0 means no saving)") parser.add_argument("--workers", type=int, default=8, help="number of workers for PyTorch (default: 8)") parser.add_argument("--max-count", type=int, default=1000, help="maximum number of frames to run for") parser.add_argument("--sample_duration", type=float, default=0.5, help="sampling duration") parser.add_argument("--cuda", action="store_true", default=False, help="whether to use cuda") args = parser.parse_args(argv) utils.seed(args.seed) torch_settings = init_torch( seed=args.seed, cuda=args.cuda, workers=args.workers, ) # Generate environments envs = [] for i in range(args.procs): env = gym.make(args.env) env.seed(100 * args.seed + i) envs.append(env) # Define model name suffix = datetime.datetime.now().strftime("%y-%m-%d-%H-%M-%S") instr = args.instr_arch if args.instr_arch else "noinstr" mem = "mem" if not args.no_mem else "nomem" model_name_parts = { 'env': args.env, 'algo': args.algo, 'arch': args.arch, 'instr': instr, 'mem': mem, 'seed': args.seed, 'info': '', 'coef': '', 'suffix': suffix } default_model_name = "{env}_{algo}_{arch}_{instr}_{mem}_seed{seed}{info}{coef}_{suffix}".format( **model_name_parts) if args.pretrained_model: default_model_name = args.pretrained_model + '_pretrained_' + default_model_name args.model = args.model.format( **model_name_parts) if args.model else default_model_name utils.configure_logging(args.model) logger = logging.getLogger(__name__) # Define obss preprocessor if 'emb' in args.arch: obss_preprocessor = utils.IntObssPreprocessor( args.model, envs[0].observation_space, args.pretrained_model) else: obss_preprocessor = utils.ObssPreprocessor(args.model, envs[0].observation_space, args.pretrained_model) # Define actor-critic model # acmodel = utils.load_model(args.model, raise_not_found=False) acmodel = None if acmodel is None: if args.pretrained_model: acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: acmodel = ACModel(obss_preprocessor.obs_space, envs[0].action_space, args.image_dim, args.memory_dim, args.instr_dim, not args.no_instr, args.instr_arch, not args.no_mem, args.arch) obss_preprocessor.vocab.save() # utils.save_model(acmodel, args.model) if torch_settings.cuda: acmodel.cuda() # Define actor-critic algo reshape_reward = lambda _0, _1, reward, _2: args.reward_scale * reward if args.algo == "ppo": algo = babyai.rl.PPOAlgo( envs, acmodel, args.frames_per_proc, args.discount, args.lr, args.beta1, args.beta2, args.gae_lambda, args.entropy_coef, args.value_loss_coef, args.max_grad_norm, args.recurrence, args.optim_eps, args.clip_eps, args.ppo_epochs, args.batch_size, obss_preprocessor, reshape_reward) else: raise ValueError("Incorrect algorithm name: {}".format(args.algo)) # When using extra binary information, more tensors (model params) are initialized compared to when we don't use that. # Thus, there starts to be a difference in the random state. If we want to avoid it, in order to make sure that # the results of supervised-loss-coef=0. and extra-binary-info=0 match, we need to reseed here. utils.seed(args.seed) # Restore training status status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') if os.path.exists(status_path): with open(status_path, 'r') as src: status = json.load(src) else: status = {'i': 0, 'num_episodes': 0, 'num_frames': 0} # # Define logger and Tensorboard writer and CSV writer # header = (["update", "episodes", "frames", "FPS", "duration"] # + ["return_" + stat for stat in ['mean', 'std', 'min', 'max']] # + ["success_rate"] # + ["num_frames_" + stat for stat in ['mean', 'std', 'min', 'max']] # + ["entropy", "value", "policy_loss", "value_loss", "loss", "grad_norm"]) # if args.tb: # from tensorboardX import SummaryWriter # writer = SummaryWriter(utils.get_log_dir(args.model)) # csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') # first_created = not os.path.exists(csv_path) # # we don't buffer data going in the csv log, cause we assume # # that one update will take much longer that one write to the log # csv_writer = csv.writer(open(csv_path, 'a', 1)) # if first_created: # csv_writer.writerow(header) # Log code state, command, availability of CUDA and model babyai_code = list(babyai.__path__)[0] try: last_commit = subprocess.check_output( 'cd {}; git log -n1'.format(babyai_code), shell=True).decode('utf-8') logger.info('LAST COMMIT INFO:') logger.info(last_commit) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') try: diff = subprocess.check_output('cd {}; git diff'.format(babyai_code), shell=True).decode('utf-8') if diff: logger.info('GIT DIFF:') logger.info(diff) except subprocess.CalledProcessError: logger.info('Could not figure out the last commit') logger.info('COMMAND LINE ARGS:') logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(acmodel) # Train model total_start_time = time.time() best_success_rate = 0 best_mean_return = 0 test_env_name = args.env wrapper = iteration_wrapper( exp, sync=torch_settings.sync, max_count=args.max_count, sample_duration=args.sample_duration, ) # while status['num_frames'] < args.frames: while True: with wrapper() as it: # Update parameters if wrapper.done(): break update_start_time = time.time() logs = algo.update_parameters() update_end_time = time.time() it.set_count(logs["num_frames"]) it.log(loss=logs["loss"], )
def main(args, seed, episodes): # Set seed for all randomness sources utils.seed(seed) # Keep track of results per task. results = {} for env_name in args.env: start_time = time.time() env = gym.make(env_name) env.seed(seed) if args.model is None and args.episodes > len(agent.demos): # Set the number of episodes to be the number of demos episodes = len(agent.demos) # Define agent agent = utils.load_agent(env, args.model, args.demos, args.demos_origin, args.argmax, env_name, model_path=args.model_path) # Evaluate if isinstance(agent, utils.DemoAgent): logs = evaluate_demo_agent(agent, episodes) elif isinstance(agent, utils.BotAgent) or args.contiguous_episodes: logs = evaluate(agent, env, episodes, False) else: logs = batch_evaluate(agent, env_name, seed, episodes) end_time = time.time() # Print logs num_frames = sum(logs["num_frames_per_episode"]) fps = num_frames / (end_time - start_time) ellapsed_time = int(end_time - start_time) duration = datetime.timedelta(seconds=ellapsed_time) if args.model is not None: return_per_episode = utils.synthesize(logs["return_per_episode"]) success_per_episode = utils.synthesize( [1 if r > 0 else 0 for r in logs["return_per_episode"]]) num_frames_per_episode = utils.synthesize( logs["num_frames_per_episode"]) if args.model is not None: print( "F {} | FPS {:.0f} | D {} | R:xsmM {:.3f} {:.3f} {:.3f} {:.3f} | S {:.3f} | F:xsmM {:.1f} {:.1f} {} {}" .format(num_frames, fps, duration, *return_per_episode.values(), success_per_episode['mean'], *num_frames_per_episode.values())) else: print( "F {} | FPS {:.0f} | D {} | F:xsmM {:.1f} {:.1f} {} {}".format( num_frames, fps, duration, *num_frames_per_episode.values())) indexes = sorted(range(len(logs["num_frames_per_episode"])), key=lambda k: -logs["num_frames_per_episode"][k]) n = args.worst_episodes_to_show if n > 0: print("{} worst episodes:".format(n)) for i in indexes[:n]: if 'seed_per_episode' in logs: print(logs['seed_per_episode'][i]) if args.model is not None: print("- episode {}: R={}, F={}".format( i, logs["return_per_episode"][i], logs["num_frames_per_episode"][i])) else: print("- episode {}: F={}".format( i, logs["num_frames_per_episode"][i])) # Store results for this env. logs['return_per_episode'] = return_per_episode logs['success_per_episode'] = success_per_episode logs['num_frames_per_episode'] = num_frames_per_episode results[env_name] = logs return results
def __init__(self, args): """ :param args: """ super(MetaLearner, self).__init__() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.task_num = args.task_num self.args = args utils.seed(self.args.seed) self.env = gym.make(self.args.env) demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) logger.info('loading demos') self.train_demos = utils.load_demos(demos_path) logger.info('loaded demos') # if args.episodes: # if args.episodes > len(self.train_demos): # raise ValueError("there are only {} train demos".format(len(self.train_demos))) # self.train_demos = self.train_demos[:args.episodes] self.val_demos = utils.load_demos(demos_path_valid) # if args.val_episodes > len(self.val_demos): # logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(self.val_demos))) self.val_demos = self.val_demos[:self.args.val_episodes] observation_space = self.env.observation_space action_space = self.env.action_space print(args.model) self.obss_preprocessor = utils.ObssPreprocessor( args.model, observation_space, getattr(self.args, 'pretrained_model', None)) # Define actor-critic model # self.net = utils.load_model(args.model, raise_not_found=False) # if self.net is None: # if getattr(self.args, 'pretrained_model', None): # self.net = utils.load_model(args.pretrained_model, raise_not_found=True) # else: self.net = ACModel(self.obss_preprocessor.obs_space, action_space, args.image_dim, args.memory_dim, args.instr_dim, not self.args.no_instr, self.args.instr_arch, not self.args.no_mem, self.args.arch) self.obss_preprocessor.vocab.save() # utils.save_model(self.net, args.model) self.fast_net = copy.deepcopy(self.net) self.net.train() self.fast_net.train() if torch.cuda.is_available(): self.net.cuda() self.fast_net.cuda() self.optimizer = torch.optim.SGD(self.fast_net.parameters(), lr=self.args.update_lr) # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)
def generate_demos(n_episodes, valid, seed, shift=0): utils.seed(seed) # Generate environment env = gym.make(args.env) use_pixels = args.pixels if use_pixels: env = RGBImgPartialObsWrapper(env) agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax, args.env) demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid) demos = [] checkpoint_time = time.time() just_crashed = False while True: if len(demos) == n_episodes: break done = False if just_crashed: logger.info( "reset the environment to find a mission that the bot can solve" ) env.reset() else: env.seed(seed + len(demos)) obs = env.reset() agent.on_reset() actions = [] mission = obs["mission"] images = [] directions = [] try: while not done: action = agent.act(obs)['action'] if isinstance(action, torch.Tensor): action = action.item() new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) actions.append(action) images.append(obs['image']) if use_pixels: directions.append(None) else: directions.append(obs['direction']) obs = new_obs if reward > 0 and (args.filter_steps == 0 or len(images) <= args.filter_steps): demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) just_crashed = False if reward == 0: if args.on_exception == 'crash': raise Exception( "mission failed, the seed is {}".format(seed + len(demos))) just_crashed = True logger.info("mission failed") except (Exception, AssertionError): if args.on_exception == 'crash': raise just_crashed = True logger.exception("error while generating demo #{}".format( len(demos))) continue if len(demos) and len(demos) % args.log_interval == 0: now = time.time() demos_per_second = args.log_interval / (now - checkpoint_time) to_go = (n_episodes - len(demos)) / demos_per_second logger.info( "demo #{}, {:.3f} demos per second, {:.3f} seconds to go". format(len(demos) - 1, demos_per_second, to_go)) checkpoint_time = now # Save demonstrations if args.save_interval > 0 and len( demos) < n_episodes and len(demos) % args.save_interval == 0: logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("{} demos saved".format(len(demos))) # print statistics for the last 100 demonstrations print_demo_lengths(demos[-100:]) # Save demonstrations logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("{} demos saved".format(len(demos))) print_demo_lengths(demos[-100:])