def main(args): # Verify the arguments when we train on multiple environments # No need to check for the length of len(args.multi_env) in case, for some reason, we need to validate on other envs if args.multi_env is not None: assert len(args.multi_demos) == len(args.multi_episodes) args.model = args.model or ImitationLearning.default_model_name(args) utils.configure_logging(args.model) logger = logging.getLogger(__name__) il_learn = ImitationLearning(args) # Define logger and Tensorboard writer header = ([ "update", "frames", "FPS", "duration", "entropy", "policy_loss", "train_accuracy" ] + ["validation_accuracy"]) if args.multi_env is None: header.extend(["validation_return", "validation_success_rate"]) else: header.extend( ["validation_return_{}".format(env) for env in args.multi_env]) header.extend([ "validation_success_rate_{}".format(env) for env in args.multi_env ]) if args.weigh_corrections: header.extend(["correction_weight_loss"]) if args.compute_cic: header.extend(["val_cic"]) writer = None if args.tb: from tensorboardX import SummaryWriter writer = SummaryWriter(utils.get_log_dir(args.model)) # Define csv writer csv_writer = None csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') first_created = not os.path.exists(csv_path) # we don't buffer data going in the csv log, cause we assume # that one update will take much longer that one write to the log csv_writer = csv.writer(open(csv_path, 'a', 1)) if first_created: csv_writer.writerow(header) # Get the status path status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') # Log command, availability of CUDA, and model logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(il_learn.model) il_learn.train(il_learn.train_demos, writer, csv_writer, status_path, header)
def main(args): args.model = args.model or ImitationLearning.default_model_name(args) utils.configure_logging(args.model) il_learn = ImitationLearning(args) # Define logger and Tensorboard writer header = (["update", "frames", "FPS", "duration", "entropy", "policy_loss", "train_accuracy"] + ["validation_accuracy", "validation_return", "validation_success_rate"]) writer = None if args.tb: from tensorboardX import SummaryWriter writer = SummaryWriter(utils.get_log_dir(args.model)) # Define csv writer csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') first_created = not os.path.exists(csv_path) # we don't buffer data going in the csv log, cause we assume # that one update will take much longer that one write to the log csv_writer = csv.writer(open(csv_path, 'a', 1)) if first_created: csv_writer.writerow(header) # Get the status path status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') # Log command, availability of CUDA, and model logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(il_learn.acmodel) train_demos = [] # Generate the initial set of training demos if not args.dagger or args.dagger_start_with_bot_demos: train_demos += generate_demos(args.env, range(args.seed, args.seed + args.start_demos)) # Seed at which evaluation will begin eval_seed = args.seed + args.start_demos model_name = args.model if args.dagger: mean_steps = get_bot_mean(args.env, args.episodes_to_evaluate_mean, args.seed) else: mean_steps = None for phase_no in range(0, args.phases): logger.info("Starting phase {} with {} demos".format(phase_no, len(train_demos))) if not args.finetune: # Create a new model to be trained from scratch logging.info("Creating new model to be trained from scratch") args.model = model_name + ('_phase_%d' % phase_no) il_learn = ImitationLearning(args) # Train the imitation learning agent if len(train_demos) > 0: il_learn.train(train_demos, writer, csv_writer, status_path, header, reset_status=True) # Stopping criterion valid_log = il_learn.validate(args.val_episodes) success_rate = np.mean([1 if r > 0 else 0 for r in valid_log[0]['return_per_episode']]) if success_rate >= 0.99: logger.info("Reached target success rate with {} demos, stopping".format(len(train_demos))) break eval_seed = grow_training_set(il_learn, train_demos, eval_seed, args.demo_grow_factor, args.num_eval_demos, args.dagger, mean_steps)
def main(args): args.model = args.model or ImitationLearning.default_model_name(args) utils.configure_logging(args.model) il_learn = ImitationLearning(args) # Define logger and Tensorboard writer header = ([ "update", "frames", "FPS", "duration", "entropy", "policy_loss", "train_accuracy" ] + [ "validation_accuracy", "validation_return", "validation_success_rate" ]) writer = None if args.tb: from tensorboardX import SummaryWriter writer = SummaryWriter(utils.get_log_dir(args.model)) # Define csv writer csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') first_created = not os.path.exists(csv_path) # we don't buffer data going in the csv log, cause we assume # that one update will take much longer that one write to the log csv_writer = csv.writer(open(csv_path, 'a', 1)) if first_created: csv_writer.writerow(header) # Log command, availability of CUDA, and model logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(il_learn.acmodel) # Seed at which demo evaluation/generation will begin eval_seed = args.seed + len(il_learn.train_demos) # Phase at which we start cur_phase = 0 # Try to load the status (if resuming) status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') if os.path.exists(status_path): with open(status_path, 'r') as src: status = json.load(src) eval_seed = status.get('eval_seed', eval_seed) cur_phase = status.get('cur_phase', cur_phase) model_name = args.model for phase_no in range(cur_phase, args.phases): logger.info("Starting phase {} with {} demos, eval_seed={}".format( phase_no, len(il_learn.train_demos), eval_seed)) # Each phase trains a different model from scratch args.model = model_name + ('_phase_%d' % phase_no) il_learn = ImitationLearning(args) # Train the imitation learning agent if len(il_learn.train_demos) > 0: train_status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') il_learn.train(il_learn.train_demos, writer, csv_writer, train_status_path, header) # Stopping criterion valid_log = il_learn.validate(args.val_episodes) success_rate = np.mean( [1 if r > 0 else 0 for r in valid_log[0]['return_per_episode']]) if success_rate >= 0.99: logger.info( "Reached target success rate with {} demos, stopping".format( len(il_learn.train_demos))) break eval_seed = grow_training_set(il_learn, il_learn.train_demos, eval_seed, args.demo_grow_factor, args.num_eval_demos) # Save the current demo generation seed with open(status_path, 'w') as dst: status = {'eval_seed': eval_seed, 'cur_phase': phase_no + 1} json.dump(status, dst) # Save the demos demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) print('saving demos to:', demos_path) utils.save_demos(il_learn.train_demos, demos_path)
default=1, help="number of epochs between two validation checks (default: 1)") parser.add_argument( "--val-episodes", type=int, default=500, help="number of episodes used to evaluate the agent, and to evaluate v") if __name__ == '__main__': args = parser.parse_args() torch.manual_seed(222) torch.cuda.manual_seed_all(222) np.random.seed(222) args.model = args.model or ImitationLearning.default_model_name(args) utils.configure_logging(args.model) logger = logging.getLogger(__name__) device = torch.device('cuda') maml = EvalLearner(args).to(device) tmp = filter(lambda x: x.requires_grad, maml.parameters()) num = sum(map(lambda x: np.prod(x.shape), tmp)) print(maml) print('Total trainable tensors:', num) logs = maml.validate(maml.val_demos) H = sum([log['entropy'] for log in logs]) / float(len(logs)) PL = sum([log['policy_loss'] for log in logs]) / float(len(logs))