def train(self, args, callback, env_kwargs=None, train_kwargs=None): if train_kwargs is None: train_kwargs = {} if args.srl_model == "raw_pixels": printYellow( "Warning: ACKTR can have memory issues when running with raw_pixels" ) param_kwargs = { "verbose": 1, "n_steps": 5, "vf_coef": 0.5, "ent_coef": 0.01, "max_grad_norm": 0.5, "learning_rate": 7e-4, "vf_fisher_coef": 1.0, "gamma": 0.99, "lr_schedule": args.lr_schedule } super().train(args, callback, env_kwargs, { **param_kwargs, **train_kwargs })
def allPolicyFiles(log_dir): """ :param log_dir: :return: """ train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup( log_dir) files = glob.glob(os.path.join(log_dir + algo_name + '_*_model.pkl')) printYellow(log_dir) files = glob.glob(log_dir + '/model_*') files_list = [] for file in files: eps = int((file.split('_')[-1])) files_list.append((eps, file + '/')) def sortFirst(val): """ :param val: :return: """ return val[0] files_list.sort(key=sortFirst) res = np.array(files_list) return res[:, 0], res[:, 1]
def configureEnvAndLogFolder(args, env_kwargs, all_models): """ :param args: (ArgumentParser object) :param env_kwargs: (dict) The extra arguments for the environment :param all_models: (dict) The location of all the trained SRL models :return: (ArgumentParser object, dict) """ global PLOT_TITLE, LOG_DIR # Reward sparse or shaped env_kwargs["shape_reward"] = args.shape_reward # Actions in joint space or relative position space env_kwargs["action_joints"] = args.action_joints args.log_dir += args.env + "/" models = all_models[ args.env] ## models: config file dict of srl_model path PLOT_TITLE = args.srl_model path = models.get(args.srl_model) args.log_dir += args.srl_model + "/" env_kwargs["srl_model"] = args.srl_model if registered_srl[args.srl_model][0] == SRLType.SRL: env_kwargs["use_srl"] = True if args.latest: printYellow("Using latest srl model in {}".format( models['log_folder'])) env_kwargs["srl_model_path"] = latestPath(models['log_folder']) else: assert path is not None, "Error: SRL path not defined for {} in {}".format( args.srl_model, args.srl_config_file) # Path depending on whether to load the latest model or not if args.srl_model_path is not None: ## [HACK way to pass srl model weights path to terminal] assert os.path.exists( args.srl_model_path ), "SRL model weights: {} doesn't exist.".format( args.srl_model_path) env_kwargs["srl_model_path"] = args.srl_model_path else: srl_model_path = models['log_folder'] + path env_kwargs["srl_model_path"] = srl_model_path # Use of continual learning env env_kwargs["simple_continual_target"] = args.simple_continual env_kwargs["circular_continual_move"] = args.circular_continual env_kwargs["square_continual_move"] = args.square_continual env_kwargs["eight_continual_move"] = args.eight_continual # Add date + current time args.log_dir += "{}/{}/".format( ALGO_NAME, datetime.now().strftime("%y-%m-%d_%Hh%M_%S")) LOG_DIR = args.log_dir # wait one second if the folder exist to avoid overwritting logs time.sleep(1) os.makedirs(args.log_dir, exist_ok=True) return args, env_kwargs
def loadRunningAverage(envs, load_path_normalise=None): if load_path_normalise is not None: try: printGreen("Loading saved running average") envs.load_running_average(load_path_normalise) envs.training = False except FileNotFoundError: envs.training = True printYellow("Running Average files not found for VecNormalize, switching to training mode") return envs
def __init__(self, name, max_dist, state_dim=-1, globals_=None, learn_every=3, learn_states=False, path='data/', relative_pos=False): super(EpisodeSaver, self).__init__() self.name = name self.data_folder = path + name self.path = path try: os.makedirs(self.data_folder) except OSError: printYellow("Folder already exist") self.actions = [] self.actions_proba = [] self.rewards = [] self.images = [] self.target_positions = [] self.episode_starts = [] self.ground_truth_states = [] self.images_path = [] self.episode_step = 0 self.episode_idx = -1 self.episode_folder = None self.episode_success = False self.state_dim = state_dim self.learn_states = learn_states self.learn_every = learn_every # Every n episodes, learn a state representation self.srl_model_path = "" self.n_steps = 0 self.max_steps = 10000 self.dataset_config = { 'relative_pos': relative_pos, 'max_dist': str(max_dist) } with open("{}/dataset_config.json".format(self.data_folder), "w") as f: json.dump(self.dataset_config, f) if globals_ is not None: # Save environments parameters with open("{}/env_globals.json".format(self.data_folder), "w") as f: json.dump(filterJSONSerializableObjects(globals_), f) if self.learn_states: self.socket_client = SRLClient(self.name) self.socket_client.waitForServer()
def __init__(self, log_folder): super(LogRLStates, self).__init__() self.log_folder = log_folder + 'log_srl/' try: os.makedirs(self.log_folder) except OSError: printYellow("Folder already exist") self.actions = [] self.rewards = [] self.states = [] self.normalized_states = []
def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None): # Even though DeepQ is single core only, we need to use the pipe system to work if env_kwargs is not None and env_kwargs.get("use_srl", False): srl_model = MultiprocessSRLModel(1, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)]) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
def createEnvs(args, allow_early_resets=False, env_kwargs=None, load_path_normalise=None): """ :param args: (argparse.Namespace Object) :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs :param env_kwargs: (dict) The extra arguments for the environment :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted. :return: (Gym VecEnv) """ # imported here to prevent cyclic imports from environments.registry import registered_env from state_representation.registry import registered_srl, SRLType assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \ "Error: cannot have more than 1 CPU for the environment {}".format(args.env) if env_kwargs is not None and registered_srl[ args.srl_model][0] == SRLType.SRL: srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs) env_kwargs["state_dim"] = srl_model.state_dim env_kwargs["srl_pipe"] = srl_model.pipe envs = [ makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=allow_early_resets, env_kwargs=env_kwargs) for i in range(args.num_cpu) ] if len(envs) == 1: # No need for subprocesses when having only one env envs = DummyVecEnv(envs) else: envs = SubprocVecEnv(envs) envs = VecFrameStack(envs, args.num_stack) if args.srl_model != "raw_pixels": printYellow("Using MLP policy because working on state representation") envs = VecNormalize(envs, norm_obs=True, norm_reward=False) envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise) return envs
def __init__(self, name, env_name=None, path='data/'): super(EpisodeSaver, self).__init__() self.name = name self.data_folder = path + name self.path = path try: os.makedirs(self.data_folder) except OSError: printYellow("Folder already exist") self.actions = [] self.rewards = [] self.images = [] self.episode_starts = [] self.ground_truth_states = [] self.images_path = [] self.episode_step = 0 self.episode_idx = -1 self.episode_folder = None self.episode_success = False self.n_steps = 0 self.env_name = env_name
# TODO: check that the parameters are the same between Experiments folders = [] other = [] train_args = {} for folder in os.listdir(args.log_dir): path = "{}/{}/".format(args.log_dir, folder) env_globals = json.load(open(path + "env_globals.json", 'r')) train_args = json.load(open(path + "args.json", 'r')) if train_args["shape_reward"] == args.shape_reward: folders.append(path) else: other.append(path) if len(folders) == 0 and len(other) == 0: printYellow( "No experiment found. Is the folder path {} correct?".format( args.log_dir)) exit() elif len(folders) == 0: printYellow( "No experiments found with the given criterion. However {} experiments" .format(len(other)) + " where found {} reward shaping. ".format( "without" if args.shape_reward else "with") + "Did you mean {} the flag '--shape-reward'?".format( "without" if args.shape_reward else "with")) exit() srl_model = train_args[ 'srl_model'] if train_args['srl_model'] != "" else "raw pixels" if args.timesteps: title = srl_model + " [Timesteps]"
def main(): # Global variables for callback global ENV_NAME, ALGO, ALGO_NAME, LOG_INTERVAL, VISDOM_PORT, viz global SAVE_INTERVAL, EPISODE_WINDOW, MIN_EPISODES_BEFORE_SAVE parser = argparse.ArgumentParser( description="Train script for RL algorithms") parser.add_argument('--algo', default='ppo2', choices=list(registered_rl.keys()), help='RL algo to use', type=str) parser.add_argument('--env', type=str, help='environment ID', default='KukaButtonGymEnv-v0', choices=list(registered_env.keys())) parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)') parser.add_argument( '--episode-window', type=int, default=40, help='Episode window for moving average plot (default: 40)') parser.add_argument( '--log-dir', default='/tmp/gym/', type=str, help='directory to save agent logs and model (default: /tmp/gym)') parser.add_argument('--num-timesteps', type=int, default=int(1e6)) parser.add_argument('--srl-model', type=str, default='raw_pixels', choices=list(registered_srl.keys()), help='SRL model to use') parser.add_argument('--num-stack', type=int, default=1, help='number of frames to stack (default: 1)') parser.add_argument( '--action-repeat', type=int, default=1, help='number of times an action will be repeated (default: 1)') parser.add_argument('--port', type=int, default=8097, help='visdom server port (default: 8097)') parser.add_argument('--no-vis', action='store_true', default=False, help='disables visdom visualization') parser.add_argument( '--shape-reward', action='store_true', default=False, help='Shape the reward (reward = - distance) instead of a sparse reward' ) parser.add_argument('-c', '--continuous-actions', action='store_true', default=False) parser.add_argument( '-joints', '--action-joints', action='store_true', default=False, help= 'set actions to the joints of the arm directly, instead of inverse kinematics' ) parser.add_argument('-r', '--random-target', action='store_true', default=False, help='Set the button to a random position') parser.add_argument( '--srl-config-file', type=str, default="config/srl_models.yaml", help='Set the location of the SRL model path configuration.') parser.add_argument('--hyperparam', type=str, nargs='+', default=[]) parser.add_argument('--min-episodes-save', type=int, default=100, help="Min number of episodes before saving best model") parser.add_argument( '--latest', action='store_true', default=False, help= 'load the latest learned model (location:srl_zoo/logs/DatasetName/)') parser.add_argument( '--load-rl-model-path', type=str, default=None, help="load the trained RL model, should be with the same algorithm type" ) parser.add_argument( '-sc', '--simple-continual', action='store_true', default=False, help= 'Simple red square target for task 1 of continual learning scenario. ' + 'The task is: robot should reach the target.') parser.add_argument( '-cc', '--circular-continual', action='store_true', default=False, help='Blue square target for task 2 of continual learning scenario. ' + 'The task is: robot should turn in circle around the target.') parser.add_argument( '-sqc', '--square-continual', action='store_true', default=False, help='Green square target for task 3 of continual learning scenario. ' + 'The task is: robot should turn in square around the target.') parser.add_argument( '-ec', '--eight-continual', action='store_true', default=False, help='Green square target for task 4 of continual learning scenario. ' + 'The task is: robot should do the eigth with the target as center of the shape.' ) parser.add_argument('--teacher-data-folder', type=str, default="", help='Dataset folder of the teacher(s) policy(ies)', required=False) parser.add_argument( '--epochs-distillation', type=int, default=30, metavar='N', help='number of epochs to train for distillation(default: 30)') parser.add_argument( '--distillation-training-set-size', type=int, default=-1, help='Limit size (number of samples) of the training set (default: -1)' ) parser.add_argument( '--perform-cross-evaluation-cc', action='store_true', default=False, help='A cross evaluation from the latest stored model to all tasks') parser.add_argument( '--eval-episode-window', type=int, default=400, metavar='N', help= 'Episode window for saving each policy checkpoint for future distillation(default: 100)' ) parser.add_argument( '--new-lr', type=float, default=1.e-4, help="New learning rate ratio to train a pretrained agent") parser.add_argument('--img-shape', type=str, default="(3,64,64)", help="Image shape of environment.") parser.add_argument( "--gpu-num", help="Choose the number of GPU (CUDA_VISIBLE_DEVICES).", type=str, default="1", choices=["0", "1", "2", "3", "5", "6", "7", "8"]) parser.add_argument("--srl-model-path", help="SRL model weights path", type=str, default=None) parser.add_argument( "--relative-pos", action='store_true', default=False, help="For 'ground_truth': use relative position or not.") # Ignore unknown args for now args, unknown = parser.parse_known_args() # os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num env_kwargs = {} if args.img_shape is None: img_shape = None #(3,224,224) else: img_shape = tuple(map(int, args.img_shape[1:-1].split(","))) env_kwargs['img_shape'] = img_shape # LOAD SRL models list assert os.path.exists(args.srl_config_file), \ "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file) with open(args.srl_config_file, 'rb') as f: all_models = yaml.load(f) # Sanity check assert args.episode_window >= 1, "Error: --episode_window cannot be less than 1" assert args.num_timesteps >= 1, "Error: --num-timesteps cannot be less than 1" assert args.num_stack >= 1, "Error: --num-stack cannot be less than 1" assert args.action_repeat >= 1, "Error: --action-repeat cannot be less than 1" assert 0 <= args.port < 65535, "Error: invalid visdom port number {}, ".format(args.port) + \ "port number must be an unsigned 16bit number [0,65535]." assert registered_srl[args.srl_model][0] == SRLType.ENVIRONMENT or args.env in all_models, \ "Error: the environment {} has no srl_model defined in 'srl_models.yaml'. Cannot continue.".format(args.env) # check that all the SRL_model can be run on the environment if registered_srl[args.srl_model][1] is not None: found = False for compatible_class in registered_srl[args.srl_model][1]: if issubclass(compatible_class, registered_env[args.env][0]): found = True break assert found, "Error: srl_model {}, is not compatible with the {} environment.".format( args.srl_model, args.env) assert not(sum([args.simple_continual, args.circular_continual, args.square_continual, args.eight_continual]) \ > 1 and args.env == "OmnirobotEnv-v0"), \ "For continual SRL and RL, please provide only one scenario at the time and use OmnirobotEnv-v0 environment !" assert not(args.algo == "distillation" and (args.teacher_data_folder == '' or args.continuous_actions is True)), \ "For performing policy distillation, make sure use specify a valid teacher dataset and discrete actions !" ENV_NAME = args.env ALGO_NAME = args.algo VISDOM_PORT = args.port EPISODE_WINDOW = args.episode_window MIN_EPISODES_BEFORE_SAVE = args.min_episodes_save CROSS_EVAL = args.perform_cross_evaluation_cc EPISODE_WINDOW_DISTILLATION_WIN = args.eval_episode_window NEW_LR = args.new_lr print("EPISODE_WINDOW_DISTILLATION_WIN: ", EPISODE_WINDOW_DISTILLATION_WIN) if args.no_vis: viz = False algo_class, algo_type, action_type = registered_rl[args.algo] algo = algo_class() ALGO = algo # if callback frequency needs to be changed LOG_INTERVAL = algo.LOG_INTERVAL SAVE_INTERVAL = algo.SAVE_INTERVAL if not args.continuous_actions and ActionType.DISCRETE not in action_type: raise ValueError( args.algo + " does not support discrete actions, please use the '--continuous-actions' " + "(or '-c') flag.") if args.continuous_actions and ActionType.CONTINUOUS not in action_type: raise ValueError( args.algo + " does not support continuous actions, please remove the '--continuous-actions' " + "(or '-c') flag.") env_kwargs["is_discrete"] = not args.continuous_actions printGreen("\nAgent = {} \n".format(args.algo)) env_kwargs["action_repeat"] = args.action_repeat # Random init position for button env_kwargs["random_target"] = args.random_target # If in simple continual scenario, then the target should be initialized randomly. if args.simple_continual is True: env_kwargs["random_target"] = True # Allow up action # env_kwargs["force_down"] = False # allow multi-view env_kwargs['multi_view'] = args.srl_model == "multi_view_srl" parser = algo.customArguments(parser) args = parser.parse_args() args, env_kwargs = configureEnvAndLogFolder(args, env_kwargs, all_models) args_dict = filterJSONSerializableObjects(vars(args)) # Save args with open(LOG_DIR + "args.json", "w") as f: json.dump(args_dict, f) env_class = registered_env[args.env][0] # env default kwargs default_env_kwargs = { k: v.default for k, v in inspect.signature(env_class.__init__).parameters.items() if v is not None } globals_env_param = sys.modules[env_class.__module__].getGlobals() ### HACK way to reset image shape !! globals_env_param['RENDER_HEIGHT'] = img_shape[1] globals_env_param['RENDER_WIDTH'] = img_shape[2] globals_env_param['RELATIVE_POS'] = args.relative_pos super_class = registered_env[args.env][1] # reccursive search through all the super classes of the asked environment, in order to get all the arguments. rec_super_class_lookup = { dict_class: dict_super_class for _, (dict_class, dict_super_class, _, _) in registered_env.items() } while super_class != SRLGymEnv: assert super_class in rec_super_class_lookup, "Error: could not find super class of {}".format(super_class) + \ ", are you sure \"registered_env\" is correctly defined?" super_env_kwargs = { k: v.default for k, v in inspect.signature( super_class.__init__).parameters.items() if v is not None } default_env_kwargs = {**super_env_kwargs, **default_env_kwargs} globals_env_param = { **sys.modules[super_class.__module__].getGlobals(), **globals_env_param } super_class = rec_super_class_lookup[super_class] # Print Variables printYellow("Arguments:") pprint(args_dict) printYellow("Env Globals:") pprint( filterJSONSerializableObjects({ **globals_env_param, **default_env_kwargs, **env_kwargs })) # Save env params saveEnvParams(globals_env_param, {**default_env_kwargs, **env_kwargs}) # Seed tensorflow, python and numpy random generator set_global_seeds(args.seed) # Augment the number of timesteps (when using mutliprocessing this number is not reached) args.num_timesteps = int(1.1 * args.num_timesteps) # Get the hyperparameter, if given (Hyperband) hyperparams = { param.split(":")[0]: param.split(":")[1] for param in args.hyperparam } hyperparams = algo.parserHyperParam(hyperparams) if args.load_rl_model_path is not None: #use a small learning rate print("use a small learning rate: {:f}".format(1.0e-4)) hyperparams["learning_rate"] = lambda f: f * 1.0e-4 # Train the agent if args.load_rl_model_path is not None: algo.setLoadPath(args.load_rl_model_path) algo.train(args, callback, env_kwargs=env_kwargs, train_kwargs=hyperparams)
def main(): parser = argparse.ArgumentParser( description='Deteministic dataset generator for SRL training ' + '(can be used for environment testing)') parser.add_argument('--num-cpu', type=int, default=1, help='number of cpu to run on') parser.add_argument('--num-episode', type=int, default=50, help='number of episode to run') parser.add_argument( '--save-path', type=str, default='srl_zoo/data/', help='Folder where the environments will save the output') parser.add_argument('--name', type=str, default='kuka_button', help='Folder name for the output') parser.add_argument('--env', type=str, default='KukaButtonGymEnv-v0', help='The environment wanted', choices=list(registered_env.keys())) parser.add_argument('--display', action='store_true', default=False) parser.add_argument('--no-record-data', action='store_true', default=False) parser.add_argument( '--max-distance', type=float, default=0.28, help= 'Beyond this distance from the goal, the agent gets a negative reward') parser.add_argument('-c', '--continuous-actions', action='store_true', default=False) parser.add_argument('--seed', type=int, default=0, help='the seed') parser.add_argument( '-f', '--force', action='store_true', default=False, help='Force the save, even if it overrides something else,' + ' including partial parts if they exist') parser.add_argument('-r', '--random-target', action='store_true', default=False, help='Set the button to a random position') parser.add_argument('--multi-view', action='store_true', default=False, help='Set a second camera to the scene') parser.add_argument( '--shape-reward', action='store_true', default=False, help='Shape the reward (reward = - distance) instead of a sparse reward' ) parser.add_argument( '--reward-dist', action='store_true', default=False, help= 'Prints out the reward distribution when the dataset generation is finished' ) parser.add_argument( '--run-policy', type=str, default="random", choices=VALID_POLICIES, help='Policy to run for data collection ' + '(random, localy pretrained ppo2, pretrained custom policy)') parser.add_argument( '--log-custom-policy', type=str, default='', help='Logs of the custom pretained policy to run for data collection') parser.add_argument( '--latest', action='store_true', default=False, help='load the latest learned model (location: args.log-custom-policy)' ) parser.add_argument( '-rgm', '--replay-generative-model', type=str, default="", choices=['vae'], help= 'Generative model to replay for generating a dataset (for Continual Learning purposes)' ) parser.add_argument( '--log-generative-model', type=str, default='', help='Logs of the custom pretained policy to run for data collection') parser.add_argument( '--ppo2-timesteps', type=int, default=1000, help='number of timesteps to run PPO2 on before generating the dataset' ) parser.add_argument( '--toward-target-timesteps-proportion', type=float, default=0.0, help= "propotion of timesteps that use simply towards target policy, should be 0.0 to 1.0" ) parser.add_argument( '-sc', '--simple-continual', action='store_true', default=False, help= 'Simple red square target for task 1 of continual learning scenario. ' + 'The task is: robot should reach the target.') parser.add_argument( '-cc', '--circular-continual', action='store_true', default=False, help='Blue square target for task 2 of continual learning scenario. ' + 'The task is: robot should turn in circle around the target.') parser.add_argument( '-sqc', '--square-continual', action='store_true', default=False, help='Green square target for task 3 of continual learning scenario. ' + 'The task is: robot should turn in square around the target.') parser.add_argument( '--short-episodes', action='store_true', default=False, help= 'Generate short episodes (only 10 contacts with the target allowed).') parser.add_argument('--episode', type=int, default=-1, help='Model saved at episode N that we want to load') args = parser.parse_args() assert (args.num_cpu > 0), "Error: number of cpu must be positive and non zero" assert (args.max_distance > 0), "Error: max distance must be positive and non zero" assert (args.num_episode > 0), "Error: number of episodes must be positive and non zero" assert not args.reward_dist or not args.shape_reward, \ "Error: cannot display the reward distribution for continuous reward" assert not(registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \ "Error: cannot have more than 1 CPU for the environment {}".format(args.env) if args.num_cpu > args.num_episode: args.num_cpu = args.num_episode printYellow( "num_cpu cannot be greater than num_episode, defaulting to {} cpus." .format(args.num_cpu)) assert sum([args.simple_continual, args.circular_continual, args.square_continual]) <= 1, \ "For continual SRL and RL, please provide only one scenario at the time !" assert not (args.log_custom_policy == '' and args.run_policy in ['walker', 'custom']), \ "If using a custom policy, please specify a valid log folder for loading it." assert not (args.log_generative_model == '' and args.replay_generative_model == 'custom'), \ "If using a custom policy, please specify a valid log folder for loading it." if not os.path.exists(args.save_path): os.makedirs(args.save_path) # this is done so seed 0 and 1 are different and not simply offset of the same datasets. args.seed = np.random.RandomState(args.seed).randint(int(1e10)) # File exists, need to deal with it if not args.no_record_data and os.path.exists(args.save_path + args.name): assert args.force, "Error: save directory '{}' already exists".format( args.save_path + args.name) shutil.rmtree(args.save_path + args.name) for part in glob.glob(args.save_path + args.name + "_part-[0-9]*"): shutil.rmtree(part) if not args.no_record_data: # create the output os.mkdir(args.save_path + args.name) if args.num_cpu == 1: env_thread(args, 0, partition=False) else: # try and divide into multiple processes, with an environment each try: jobs = [] for i in range(args.num_cpu): process = multiprocessing.Process(target=env_thread, args=(args, i, True)) jobs.append(process) for j in jobs: j.start() try: for j in jobs: j.join() except Exception as e: printRed("Error: unable to join thread") raise e except Exception as e: printRed("Error: unable to start thread") raise e if not args.no_record_data and args.num_cpu > 1: # sleep 1 second, to avoid congruency issues from multiprocess (eg., files still writing) time.sleep(1) # get all the parts file_parts = sorted(glob.glob(args.save_path + args.name + "_part-[0-9]*"), key=lambda a: int(a.split("-")[-1])) # move the config files from any as they are identical os.rename(file_parts[0] + "/dataset_config.json", args.save_path + args.name + "/dataset_config.json") os.rename(file_parts[0] + "/env_globals.json", args.save_path + args.name + "/env_globals.json") ground_truth = None preprocessed_data = None # used to convert the part record_id to the fused record_id record_id = 0 for part in file_parts: # sort the record names alphabetically, then numerically records = sorted(glob.glob(part + "/record_[0-9]*"), key=lambda a: int(a.split("_")[-1])) record_id_start = record_id for record in records: os.renames( record, args.save_path + args.name + "/record_{:03d}".format(record_id)) record_id += 1 # fuse the npz files together, in the right order if ground_truth is None: # init ground_truth = {} preprocessed_data = {} ground_truth_load = np.load(part + "/ground_truth.npz") preprocessed_data_load = np.load(part + "/preprocessed_data.npz") for arr in ground_truth_load.files: if arr == "images_path": ground_truth[arr] = np.array([ convertImagePath(args, path, record_id_start) for path in ground_truth_load[arr] ]) else: ground_truth[arr] = ground_truth_load[arr] for arr in preprocessed_data_load.files: preprocessed_data[arr] = preprocessed_data_load[arr] else: ground_truth_load = np.load(part + "/ground_truth.npz") preprocessed_data_load = np.load(part + "/preprocessed_data.npz") for arr in ground_truth_load.files: if arr == "images_path": sanitised_paths = np.array([ convertImagePath(args, path, record_id_start) for path in ground_truth_load[arr] ]) ground_truth[arr] = np.concatenate( (ground_truth[arr], sanitised_paths)) else: ground_truth[arr] = np.concatenate( (ground_truth[arr], ground_truth_load[arr])) for arr in preprocessed_data_load.files: preprocessed_data[arr] = np.concatenate( (preprocessed_data[arr], preprocessed_data_load[arr])) # remove the current part folder shutil.rmtree(part) # save the fused outputs np.savez(args.save_path + args.name + "/ground_truth.npz", **ground_truth) np.savez(args.save_path + args.name + "/preprocessed_data.npz", **preprocessed_data) if args.reward_dist: rewards, counts = np.unique( np.load(args.save_path + args.name + "/preprocessed_data.npz")['rewards'], return_counts=True) counts = [ "{:.2f}%".format(val * 100) for val in counts / np.sum(counts) ] print("reward distribution:") [ print(" ", reward, count) for reward, count in list(zip(rewards, counts)) ]
def env_thread(args, thread_num, partition=True): """ Run a session of an environment :param args: (ArgumentParser object) :param thread_num: (int) The thread ID of the environment session :param partition: (bool) If the output should be in multiple parts (default=True) """ env_kwargs = { "max_distance": args.max_distance, "random_target": args.random_target, "force_down": True, "is_discrete": not args.continuous_actions, "renders": thread_num == 0 and args.display, "record_data": not args.no_record_data, "multi_view": args.multi_view, "save_path": args.save_path, "shape_reward": args.shape_reward, "simple_continual_target": args.simple_continual, "circular_continual_move": args.circular_continual, "square_continual_move": args.square_continual, "short_episodes": args.short_episodes } if partition: env_kwargs["name"] = args.name + "_part-" + str(thread_num) else: env_kwargs["name"] = args.name load_path, train_args, algo_name, algo_class = None, None, None, None model = None srl_model = None srl_state_dim = 0 generated_obs = None env_norm = None if args.run_policy in ["walker", "custom"]: if args.latest: args.log_dir = latestPath(args.log_custom_policy) else: args.log_dir = args.log_custom_policy args.render = args.display args.plotting, args.action_proba = False, False train_args, load_path, algo_name, algo_class, _, env_kwargs_extra = loadConfigAndSetup( args) env_kwargs["srl_model"] = env_kwargs_extra["srl_model"] env_kwargs["random_target"] = env_kwargs_extra.get( "random_target", False) env_kwargs["use_srl"] = env_kwargs_extra.get("use_srl", False) # TODO REFACTOR env_kwargs["simple_continual_target"] = env_kwargs_extra.get( "simple_continual_target", False) env_kwargs["circular_continual_move"] = env_kwargs_extra.get( "circular_continual_move", False) env_kwargs["square_continual_move"] = env_kwargs_extra.get( "square_continual_move", False) env_kwargs["eight_continual_move"] = env_kwargs_extra.get( "eight_continual_move", False) eps = 0.2 env_kwargs["state_init_override"] = np.array([MIN_X + eps, MAX_X - eps]) \ if args.run_policy == 'walker' else None if env_kwargs["use_srl"]: env_kwargs["srl_model_path"] = env_kwargs_extra.get( "srl_model_path", None) env_kwargs["state_dim"] = getSRLDim( env_kwargs_extra.get("srl_model_path", None)) srl_model = MultiprocessSRLModel(num_cpu=args.num_cpu, env_id=args.env, env_kwargs=env_kwargs) env_kwargs["srl_pipe"] = srl_model.pipe env_class = registered_env[args.env][0] env = env_class(**env_kwargs) if env_kwargs.get('srl_model', None) not in ["raw_pixels", None]: # TODO: Remove env duplication # This is a dirty trick to normalize the obs. # So for as we override SRL environment functions (step, reset) for on-policy generation & generative replay # using stable-baselines' normalisation wrappers (step & reset) breaks... env_norm = [ makeEnv(args.env, args.seed, i, args.log_dir, allow_early_resets=False, env_kwargs=env_kwargs) for i in range(args.num_cpu) ] env_norm = DummyVecEnv(env_norm) env_norm = VecNormalize(env_norm, norm_obs=True, norm_reward=False) env_norm = loadRunningAverage( env_norm, load_path_normalise=args.log_custom_policy) using_real_omnibot = args.env == "OmnirobotEnv-v0" and USING_OMNIROBOT walker_path = None action_walker = None state_init_for_walker = None kwargs_reset, kwargs_step = {}, {} if args.run_policy in ['custom', 'ppo2', 'walker']: # Additional env when using a trained agent to generate data train_env = vecEnv(env_kwargs, env_class) if args.run_policy == 'ppo2': model = PPO2(CnnPolicy, train_env).learn(args.ppo2_timesteps) else: _, _, algo_args = createEnv(args, train_args, algo_name, algo_class, env_kwargs) tf.reset_default_graph() set_global_seeds(args.seed % 2 ^ 32) printYellow("Compiling Policy function....") model = algo_class.load(load_path, args=algo_args) if args.run_policy == 'walker': walker_path = walkerPath() if len(args.replay_generative_model) > 0: srl_model = loadSRLModel(args.log_generative_model, th.cuda.is_available()) srl_state_dim = srl_model.state_dim srl_model = srl_model.model.model frames = 0 start_time = time.time() # divide evenly, then do an extra one for only some of them in order to get the right count for i_episode in range(args.num_episode // args.num_cpu + 1 * (args.num_episode % args.num_cpu > thread_num)): # seed + position in this slice + size of slice (with reminder if uneven partitions) seed = args.seed + i_episode + args.num_episode // args.num_cpu * thread_num + \ (thread_num if thread_num <= args.num_episode % args.num_cpu else args.num_episode % args.num_cpu) seed = seed % 2 ^ 32 if not (args.run_policy in ['custom', 'walker']): env.seed(seed) env.action_space.seed( seed) # this is for the sample() function from gym.space if len(args.replay_generative_model) > 0: sample = Variable(th.randn(1, srl_state_dim)) if th.cuda.is_available(): sample = sample.cuda() generated_obs = srl_model.decode(sample) generated_obs = generated_obs[0].detach().cpu().numpy() generated_obs = deNormalize(generated_obs) kwargs_reset['generated_observation'] = generated_obs obs = env.reset(**kwargs_reset) done = False action_proba = None t = 0 episode_toward_target_on = False while not done: env.render() # Policy to run on the fly - to be trained before generation if args.run_policy == 'ppo2': action, _ = model.predict([obs]) # Custom pre-trained Policy (SRL or End-to-End) elif args.run_policy in ['custom', 'walker']: obs = env_norm._normalize_observation(obs) action = [model.getAction(obs, done)] action_proba = model.getActionProba(obs, done) if args.run_policy == 'walker': action_walker = np.array(walker_path[t]) # Random Policy else: # Using a target reaching policy (untrained, from camera) when collecting data from real OmniRobot if episode_toward_target_on and np.random.rand() < args.toward_target_timesteps_proportion and \ using_real_omnibot: action = [env.actionPolicyTowardTarget()] else: action = [env.action_space.sample()] # Generative replay +/- for on-policy action if len(args.replay_generative_model) > 0: if args.run_policy == 'custom': obs = obs.reshape(1, srl_state_dim) obs = th.from_numpy(obs.astype(np.float32)).cuda() z = obs generated_obs = srl_model.decode(z) else: sample = Variable(th.randn(1, srl_state_dim)) if th.cuda.is_available(): sample = sample.cuda() generated_obs = srl_model.decode(sample) generated_obs = generated_obs[0].detach().cpu().numpy() generated_obs = deNormalize(generated_obs) action_to_step = action[0] kwargs_step = { k: v for (k, v) in [("generated_observation", generated_obs), ("action_proba", action_proba), ("action_grid_walker", action_walker)] if v is not None } obs, _, done, _ = env.step(action_to_step, **kwargs_step) frames += 1 t += 1 if done: if np.random.rand( ) < args.toward_target_timesteps_proportion and using_real_omnibot: episode_toward_target_on = True else: episode_toward_target_on = False print("Episode finished after {} timesteps".format(t + 1)) if thread_num == 0: print("{:.2f} FPS".format(frames * args.num_cpu / (time.time() - start_time)))
def GatherExperiments( folders, algo, window=40, title="", min_num_x=-1, timesteps=False, output_file="", ): """ Compute mean and standard error for several experiments and plot the learning curve :param folders: ([str]) Log folders, where the monitor.csv are stored :param window: (int) Smoothing window :param algo: (str) name of the RL algo :param title: (str) plot title :param min_num_x: (int) Minimum number of episode/timesteps to keep an experiment (default: -1, no minimum) :param timesteps: (bool) Plot timesteps instead of episodes :param y_limits: ([float]) y-limits for the plot :param output_file: (str) Path to a file where the plot data will be saved :param no_display: (bool) Set to true, the plot won't be displayed (useful when only saving plot) """ y_list = [] x_list = [] ok = False for folder in folders: if timesteps: x, y = loadData(folder, smooth=1, bin_size=100) if x is not None: x, y = np.array(x), np.array(y) else: x, y = loadEpisodesData(folder) if x is None or (min_num_x > 0 and y.shape[0] < min_num_x): printRed("Skipping {}".format(folder)) continue if y.shape[0] <= window: printYellow("Folder {}".format(folder)) printRed("Not enough episodes for current window size = {}".format( window)) continue ok = True y = movingAverage(y, window) y_list.append(y) print(len(x)) # Truncate x x = x[len(x) - len(y):] x_list.append(x) if not ok: printRed("Not enough data to plot anything with current config." + " Consider decreasing --min-x") return lengths = list(map(len, x_list)) min_x, max_x = np.min(lengths), np.max(lengths) print("Min x: {}".format(min_x)) print("Max x: {}".format(max_x)) for i in range(len(x_list)): x_list[i] = x_list[i][:min_x] y_list[i] = y_list[i][:min_x] x = np.array(x_list)[0] y = np.array(y_list) # if output_file != "": # printGreen("Saving aggregated data to {}.npz".format(output_file)) # np.savez(output_file, x=x, y=y) return x, y
def loadSRLModel(path=None, cuda=False, state_dim=None, env_object=None, img_shape=None): """ Load a trained SRL model, it will try to guess the model type from the path :param path: (str) Path to a srl model :param cuda: (bool) :param state_dim: (int) :param env_object: (gym env object) :return: (srl model) """ model_type, losses, n_actions, model = None, None, None, None if path is not None: # Get path to the log folder log_folder = '/'.join(path.split('/')[:-1]) + '/' with open(log_folder + 'exp_config.json', 'r') as f: # IMPORTANT: keep the order for the losses # so the json is loaded as an OrderedDict exp_config = json.load(f, object_pairs_hook=OrderedDict) state_dim = exp_config.get('state-dim', None) losses = exp_config.get( 'losses', None) # None in the case of baseline models (pca) n_actions = exp_config.get( 'n_actions', None) # None in the case of baseline models (pca) model_type = exp_config.get('model-type', None) use_multi_view = exp_config.get('multi-view', False) inverse_model_type = exp_config.get('inverse-model-type', 'linear') num_dataset_episodes = exp_config.get('num_dataset_episodes', 100) assert state_dim is not None, \ "Please make sure you are loading an up to date model with a conform exp_config file." split_dimensions = exp_config.get('split-dimensions') if isinstance(split_dimensions, OrderedDict): n_dims = sum(split_dimensions.values()) # Combine losses instead of splitting if n_dims == 0: split_dimensions = None else: assert env_object is not None or state_dim > 0, \ "When learning states, state_dim must be > 0. Otherwise, set SRL_MODEL_PATH \ to a srl_model.pth file with learned states." if path is not None: if 'baselines' in path: if 'pca' in path: model_type = 'pca' model = SRLPCA(state_dim) assert model_type is not None or model is not None, \ "Model type not supported. In order to use loadSRLModel, a path to an SRL model must be given." assert not (losses is None and not model_type == 'pca'), \ "Please make sure you are loading an up to date model with a conform exp_config file." assert not (n_actions is None and not (model_type == 'pca')), \ "Please make sure you are loading an up to date model with a conform exp_config file." if model is None: if use_multi_view: new_img_shape = (6, ) + img_shape[1:] else: new_img_shape = img_shape model = SRLNeuralNetwork(state_dim, cuda, img_shape=new_img_shape, model_type=model_type, n_actions=n_actions, losses=losses, split_dimensions=split_dimensions, spcls_num_classes=num_dataset_episodes, inverse_model_type=inverse_model_type) model_name = model_type if 'baselines' not in path: model_name += " with " + ", ".join(losses) printGreen("\nSRL: Using {} \n".format(model_name)) if path is not None: printYellow("Loading trained model...{}".format(path)) model.load(path) return model
def main(): # Global variables for callback parser = argparse.ArgumentParser( description= "Evaluation script for distillation from two teacher policies") parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)') parser.add_argument('--env', type=str, help='environment ID', default='OmnirobotEnv-v0', choices=list(registered_env.keys())) parser.add_argument( '--episode_window', type=int, default=40, help='Episode window for moving average plot (default: 40)') parser.add_argument( '--log-dir-teacher-one', default='/tmp/gym/', type=str, help= 'directory to load an optmimal agent for task 1 (default: /tmp/gym)') parser.add_argument( '--log-dir-teacher-two', default='/tmp/gym/', type=str, help= 'directory to load an optmimal agent for task 2 (default: /tmp/gym)') parser.add_argument( '--log-dir-student', default='/tmp/gym/', type=str, help= 'directory to save the student agent logs and model (default: /tmp/gym)' ) parser.add_argument( '--srl-config-file-one', type=str, default="config/srl_models_one.yaml", help='Set the location of the SRL model path configuration.') parser.add_argument( '--srl-config-file-two', type=str, default="config/srl_models_two.yaml", help='Set the location of the SRL model path configuration.') parser.add_argument( '--epochs-distillation', type=int, default=30, metavar='N', help='number of epochs to train for distillation(default: 30)') parser.add_argument( '--distillation-training-set-size', type=int, default=-1, help='Limit size (number of samples) of the training set (default: -1)' ) parser.add_argument( '--eval-tasks', type=str, nargs='+', default=['cc', 'sqc', 'sc'], help='A cross evaluation from the latest stored model to all tasks') parser.add_argument( '--continual-learning-labels', type=str, nargs=2, metavar=('label_1', 'label_2'), default=argparse.SUPPRESS, help='Labels for the continual learning RL distillation task.') parser.add_argument('--student-srl-model', type=str, default='raw_pixels', choices=list(registered_srl.keys()), help='SRL model to use for the student RL policy') parser.add_argument( '--epochs-teacher-datasets', type=int, default=30, metavar='N', help= 'number of epochs for generating both RL teacher datasets (default: 30)' ) parser.add_argument( '--num-iteration', type=int, default=1, help='number of time each algorithm should be run the eval (N seeds).') parser.add_argument( '--eval-episode-window', type=int, default=400, metavar='N', help= 'Episode window for saving each policy checkpoint for future distillation(default: 100)' ) args, unknown = parser.parse_known_args() if 'continual_learning_labels' in args: assert args.continual_learning_labels[0] in CONTINUAL_LEARNING_LABELS and args.continual_learning_labels[1] \ in CONTINUAL_LEARNING_LABELS, "Please specify a valid Continual learning label to each dataset to be " \ "used for RL distillation !" print(args.continual_learning_labels) assert os.path.exists(args.srl_config_file_one), \ "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file_one) assert os.path.exists(args.srl_config_file_two), \ "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file_two) if not (args.log_dir_teacher_one == "None"): assert os.path.exists(args.log_dir_teacher_one), \ "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.log_dir_teacher_one) assert os.path.exists(args.log_dir_teacher_two), \ "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file_two) teacher_pro = args.log_dir_teacher_one teacher_learn = args.log_dir_teacher_two # The output path generate from the teacher_pro_data = args.continual_learning_labels[0] + '/' teacher_learn_data = args.continual_learning_labels[1] + '/' merge_path = "data/on_policy_merged" print(teacher_pro_data, teacher_learn_data) episodes, policy_path = allPolicy(teacher_learn) rewards_at_episode = {} episodes_to_test = [ e for e in episodes if (int(e) < 2000 and int(e) % 200 == 0) or ( int(e) > 2000 and int(e) % 1000 == 0) ] # generate data from Professional teacher printYellow("\nGenerating on policy for optimal teacher: " + args.continual_learning_labels[0]) if not (args.log_dir_teacher_one == "None"): OnPolicyDatasetGenerator(teacher_pro, args.continual_learning_labels[0] + '_copy/', task_id=args.continual_learning_labels[0], num_eps=args.epochs_teacher_datasets, episode=-1, env_name=args.env) print("Eval on eps list: ", episodes_to_test) for eps in episodes_to_test: student_path = args.log_dir_student printBlue("\n\nEvaluation at episode " + str(eps)) if not (args.log_dir_teacher_one == "None"): # Use a copy of the optimal teacher ok = subprocess.call([ 'cp', '-r', 'data/' + args.continual_learning_labels[0] + '_copy/', 'data/' + teacher_pro_data, '-f' ]) assert ok == 0 time.sleep(2) # Generate data from learning teacher printYellow("\nGenerating on-policy data from the optimal teacher: " + args.continual_learning_labels[1]) OnPolicyDatasetGenerator(teacher_learn, teacher_learn_data, task_id=args.continual_learning_labels[1], episode=eps, num_eps=args.epochs_teacher_datasets, env_name=args.env) if args.log_dir_teacher_one == "None": merge_path = 'data/' + teacher_learn_data ok = subprocess.call( ['cp', '-r', merge_path, 'srl_zoo/data/', '-f']) else: # merge the data mergeData('data/' + teacher_pro_data, 'data/' + teacher_learn_data, merge_path, force=True) ok = subprocess.call( ['cp', '-r', 'data/on_policy_merged/', 'srl_zoo/data/', '-f']) assert ok == 0 time.sleep(2) # Train a policy with distillation on the merged teacher's datasets trainStudent('srl_zoo/' + merge_path, args.continual_learning_labels[1], yaml_file=args.srl_config_file_one, log_dir=args.log_dir_student, srl_model=args.student_srl_model, env_name=args.env, training_size=args.distillation_training_set_size, epochs=args.epochs_distillation) student_path += args.env + '/' + args.student_srl_model + "/distillation/" latest_student_path = max([ student_path + "/" + d for d in os.listdir(student_path) if os.path.isdir(student_path + "/" + d) ], key=os.path.getmtime) + '/' rewards = {} printRed("\nSaving the student at path: " + latest_student_path) for task_label in ["-sc", "-cc"]: rewards[task_label] = [] for seed_i in range(args.num_iteration): printYellow("\nEvaluating student on task: " + task_label + " for seed: " + str(seed_i)) command_line_enjoy_student = [ 'python', '-m', 'replay.enjoy_baselines', '--num-timesteps', '251', '--log-dir', latest_student_path, task_label, "--seed", str(seed_i) ] ok = subprocess.check_output(command_line_enjoy_student) ok = ok.decode('utf-8') str_before = "Mean reward: " str_after = "\npybullet" idx_before = ok.find(str_before) + len(str_before) idx_after = ok.find(str_after) seed_reward = float(ok[idx_before:idx_after]) rewards[task_label].append(seed_reward) print("rewards at eps ", eps, ": ", rewards) rewards_at_episode[eps] = rewards print("All rewards: ", rewards_at_episode) json_dict = json.dumps(rewards_at_episode) json_dict_name = \ args.log_dir_student + "/reward_at_episode_" + datetime.datetime.now().strftime("%y-%m-%d_%Hh%M_%S") + '.json' f = open(json_dict_name, "w") f.write(json_dict) f.close() printRed("\nSaving the evalation at path: " + json_dict_name)
def main(): parser = argparse.ArgumentParser(description="OpenAI RL Baselines Benchmark", epilog='After the arguments are parsed, the rest are assumed to be arguments for' + ' rl_baselines.train') parser.add_argument('--algo', type=str, default='ppo2', help='OpenAI baseline to use', choices=list(registered_rl.keys())) parser.add_argument('--env', type=str, nargs='+', default=["OmnirobotEnv-v0"], help='environment ID(s)', choices=["OmnirobotEnv-v0"])#list(registered_env.keys())) parser.add_argument('--srl-model', type=str, nargs='+', default=["ground_truth"], help='SRL model(s) to use', choices=list(registered_srl.keys())) parser.add_argument('--num-timesteps', type=int, default=1e6, help='number of timesteps the baseline should run') parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Display baseline STDOUT') parser.add_argument('--num-iteration', type=int, default=15, help='number of time each algorithm should be run for each unique combination of environment ' + ' and srl-model.') parser.add_argument('--seed', type=int, default=0, help='initial seed for each unique combination of environment and srl-model.') parser.add_argument('--srl-config-file', nargs='+', type=str, default=["config/srl_models.yaml"], help='Set the location of the SRL model path configuration.') parser.add_argument('--tasks', type=str, nargs='+', default=["cc"], help='The tasks for the robot', choices=["cc","ec","sqc","sc"]) # parser.add_argument('--srl-modell', type=str, default="",help='') # returns the parsed arguments, and the rest are assumed to be arguments for rl_baselines.train args, train_args = parser.parse_known_args() # Sanity check assert args.num_timesteps >= 1, "Error: --num-timesteps cannot be less than 1" assert args.num_iteration >= 1, "Error: --num-iteration cannot be less than 1" # Removing duplicates and sort srl_models = list(set(args.srl_model)) envs = list(set(args.env)) tasks=args.tasks srl_models.sort() envs.sort() tasks=['-'+t for t in tasks] config_files=args.srl_config_file # LOAD SRL models list if len(config_files)==1: printYellow("Your are using the same config file: {} for all training tasks".format(config_files[0])) for i in range(len(tasks)-1): config_files.append(config_files[0]) else: assert len(config_files)==len(tasks), \ "Error: {} config files given for {} tasks".format(len(config_files),len(tasks)) for file in config_files: assert os.path.exists(file), \ "Error: cannot load \"--srl-config-file {}\", file not found!".format(file) for file in config_files: with open(file, 'rb') as f: all_models = yaml.load(f) # Checking definition and presence of all requested srl_models valid = True for env in envs: # validated the env definition if env not in all_models: printRed("Error: 'srl_models.yaml' missing definition for environment {}".format(env)) valid = False continue # skip to the next env, this one is not valid # checking log_folder for current env missing_log = "log_folder" not in all_models[env] if missing_log: printRed("Error: '{}' missing definition for log_folder in environment {}".format(file, env)) valid = False # validate each model for the current env definition for model in srl_models: if registered_srl[model][0] == SRLType.ENVIRONMENT: continue # not an srl model, skip to the next model elif model not in all_models[env]: printRed("Error: '{}' missing srl_model {} for environment {}".format(file, model, env)) valid = False elif (not missing_log) and (not os.path.exists(all_models[env]["log_folder"] + all_models[env][model])): # checking presence of srl_model path, if and only if log_folder exists printRed("Error: srl_model {} for environment {} was defined in ".format(model, env) + "'{}', however the file {} it was tagetting does not exist.".format( file, all_models[env]["log_folder"] + all_models[env][model])) valid = False assert valid, "Errors occurred due to malformed {}, cannot continue.".format(file) # check that all the SRL_models can be run on all the environments valid = True for env in envs: for model in srl_models: if registered_srl[model][1] is not None: found = False for compatible_class in registered_srl[model][1]: if issubclass(compatible_class, registered_env[env][0]): found = True break if not found: valid = False printRed("Error: srl_model {}, is not compatible with the {} environment.".format(model, env)) assert valid, "Errors occured due to an incompatible combination of srl_model and environment, cannot continue." # the seeds used in training the baseline. seeds = list(np.arange(args.num_iteration) + args.seed) if args.verbose: # None here means stdout of terminal for subprocess.call stdout = None else: stdout = open(os.devnull, 'w') printGreen("\nRunning {} benchmarks {} times...".format(args.algo, args.num_iteration)) print("\nSRL-Models:\t{}".format(srl_models)) print("environments:\t{}".format(envs)) print("verbose:\t{}".format(args.verbose)) print("timesteps:\t{}".format(args.num_timesteps)) num_tasks=len(tasks) print(num_tasks) printGreen("The tasks that will be exacuted: {}".format(args.tasks)) printGreen("with following config files: {}".format(config_files)) for model in srl_models: for env in envs: for iter_task in range(num_tasks): for i in range(args.num_iteration): printGreen( "\nIteration_num={} (seed: {}), Environment='{}', SRL-Model='{}' , Task='{}',Config_file='{}'".format(i, seeds[i], env, model, tasks[iter_task],config_files[iter_task])) # redefine the parsed args for rl_baselines.train loop_args = ['--srl-model', model, '--seed', str(seeds[i]), '--algo', args.algo, '--env', env, '--num-timesteps', str(int(args.num_timesteps)), '--srl-config-file', config_files[iter_task], tasks[iter_task]] ok = subprocess.call(['python', '-m', 'rl_baselines.train'] + train_args + loop_args, stdout=stdout) if ok != 0: # throw the error down to the terminal raise ChildProcessError("An error occured, error code: {}".format(ok))
def main(): # Global variables for callback global ENV_NAME, ALGO, ALGO_NAME, LOG_INTERVAL, VISDOM_PORT, viz global SAVE_INTERVAL, EPISODE_WINDOW, MIN_EPISODES_BEFORE_SAVE parser = argparse.ArgumentParser(description="Train script for RL algorithms") parser.add_argument('--algo', default='ppo2', choices=list(registered_rl.keys()), help='RL algo to use', type=str) parser.add_argument('--env', type=str, help='environment ID', default='KukaButtonGymEnv-v0', choices=list(registered_env.keys())) parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)') parser.add_argument('--episode_window', type=int, default=40, help='Episode window for moving average plot (default: 40)') parser.add_argument('--log-dir', default='/tmp/gym/', type=str, help='directory to save agent logs and model (default: /tmp/gym)') parser.add_argument('--num-timesteps', type=int, default=int(1e6)) parser.add_argument('--srl-model', type=str, default='raw_pixels', choices=list(registered_srl.keys()), help='SRL model to use') parser.add_argument('--num-stack', type=int, default=1, help='number of frames to stack (default: 1)') parser.add_argument('--action-repeat', type=int, default=1, help='number of times an action will be repeated (default: 1)') parser.add_argument('--port', type=int, default=8097, help='visdom server port (default: 8097)') parser.add_argument('--no-vis', action='store_true', default=False, help='disables visdom visualization') parser.add_argument('--shape-reward', action='store_true', default=False, help='Shape the reward (reward = - distance) instead of a sparse reward') parser.add_argument('-c', '--continuous-actions', action='store_true', default=False) parser.add_argument('-joints', '--action-joints', action='store_true', default=False, help='set actions to the joints of the arm directly, instead of inverse kinematics') parser.add_argument('-r', '--random-target', action='store_true', default=False, help='Set the button to a random position') parser.add_argument('--srl-config-file', type=str, default="config/srl_models.yaml", help='Set the location of the SRL model path configuration.') parser.add_argument('--hyperparam', type=str, nargs='+', default=[]) parser.add_argument('--min-episodes-save', type=int, default=100, help="Min number of episodes before saving best model") parser.add_argument('--latest', action='store_true', default=False, help='load the latest learned model (location:srl_zoo/logs/DatasetName/)') parser.add_argument('--load-rl-model-path', type=str, default=None, help="load the trained RL model, should be with the same algorithm type") # Ignore unknown args for now args, unknown = parser.parse_known_args() env_kwargs = {} # LOAD SRL models list assert os.path.exists(args.srl_config_file), \ "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file) with open(args.srl_config_file, 'rb') as f: all_models = yaml.load(f) # Sanity check assert args.episode_window >= 1, "Error: --episode_window cannot be less than 1" assert args.num_timesteps >= 1, "Error: --num-timesteps cannot be less than 1" assert args.num_stack >= 1, "Error: --num-stack cannot be less than 1" assert args.action_repeat >= 1, "Error: --action-repeat cannot be less than 1" assert 0 <= args.port < 65535, "Error: invalid visdom port number {}, ".format(args.port) + \ "port number must be an unsigned 16bit number [0,65535]." assert registered_srl[args.srl_model][0] == SRLType.ENVIRONMENT or args.env in all_models, \ "Error: the environment {} has no srl_model defined in 'srl_models.yaml'. Cannot continue.".format(args.env) # check that all the SRL_model can be run on the environment if registered_srl[args.srl_model][1] is not None: found = False for compatible_class in registered_srl[args.srl_model][1]: if issubclass(compatible_class, registered_env[args.env][0]): found = True break assert found, "Error: srl_model {}, is not compatible with the {} environment.".format(args.srl_model, args.env) ENV_NAME = args.env ALGO_NAME = args.algo VISDOM_PORT = args.port EPISODE_WINDOW = args.episode_window MIN_EPISODES_BEFORE_SAVE = args.min_episodes_save if args.no_vis: viz = False algo_class, algo_type, action_type = registered_rl[args.algo] algo = algo_class() ALGO = algo # if callback frequency needs to be changed LOG_INTERVAL = algo.LOG_INTERVAL SAVE_INTERVAL = algo.SAVE_INTERVAL if not args.continuous_actions and ActionType.DISCRETE not in action_type: raise ValueError(args.algo + " does not support discrete actions, please use the '--continuous-actions' " + "(or '-c') flag.") if args.continuous_actions and ActionType.CONTINUOUS not in action_type: raise ValueError(args.algo + " does not support continuous actions, please remove the '--continuous-actions' " + "(or '-c') flag.") env_kwargs["is_discrete"] = not args.continuous_actions printGreen("\nAgent = {} \n".format(args.algo)) env_kwargs["action_repeat"] = args.action_repeat # Random init position for button env_kwargs["random_target"] = args.random_target # Allow up action # env_kwargs["force_down"] = False # allow multi-view env_kwargs['multi_view'] = args.srl_model == "multi_view_srl" parser = algo.customArguments(parser) args = parser.parse_args() args, env_kwargs = configureEnvAndLogFolder(args, env_kwargs, all_models) args_dict = filterJSONSerializableObjects(vars(args)) # Save args with open(LOG_DIR + "args.json", "w") as f: json.dump(args_dict, f) env_class = registered_env[args.env][0] # env default kwargs default_env_kwargs = {k: v.default for k, v in inspect.signature(env_class.__init__).parameters.items() if v is not None} globals_env_param = sys.modules[env_class.__module__].getGlobals() super_class = registered_env[args.env][1] # reccursive search through all the super classes of the asked environment, in order to get all the arguments. rec_super_class_lookup = {dict_class: dict_super_class for _, (dict_class, dict_super_class, _, _) in registered_env.items()} while super_class != SRLGymEnv: assert super_class in rec_super_class_lookup, "Error: could not find super class of {}".format(super_class) + \ ", are you sure \"registered_env\" is correctly defined?" super_env_kwargs = {k: v.default for k, v in inspect.signature(super_class.__init__).parameters.items() if v is not None} default_env_kwargs = {**super_env_kwargs, **default_env_kwargs} globals_env_param = {**sys.modules[super_class.__module__].getGlobals(), **globals_env_param} super_class = rec_super_class_lookup[super_class] # Print Variables printYellow("Arguments:") pprint(args_dict) printYellow("Env Globals:") pprint(filterJSONSerializableObjects({**globals_env_param, **default_env_kwargs, **env_kwargs})) # Save env params saveEnvParams(globals_env_param, {**default_env_kwargs, **env_kwargs}) # Seed tensorflow, python and numpy random generator set_global_seeds(args.seed) # Augment the number of timesteps (when using mutliprocessing this number is not reached) args.num_timesteps = int(1.1 * args.num_timesteps) # Get the hyperparameter, if given (Hyperband) hyperparams = {param.split(":")[0]: param.split(":")[1] for param in args.hyperparam} hyperparams = algo.parserHyperParam(hyperparams) if args.load_rl_model_path is not None: #use a small learning rate print("use a small learning rate: {:f}".format(1.0e-4)) hyperparams["learning_rate"] = lambda f: f * 1.0e-4 # Train the agent if args.load_rl_model_path is not None: algo.setLoadPath(args.load_rl_model_path) algo.train(args, callback, env_kwargs=env_kwargs, train_kwargs=hyperparams)
def loadConfigAndSetup(load_args): """ Get the training config and setup the parameters :param load_args: (Arguments) :return: (dict, str, str, str, dict) """ algo_name = "" for algo in list(registered_rl.keys()): if algo in load_args.log_dir: algo_name = algo break algo_class, algo_type, _ = registered_rl[algo_name] if algo_type == AlgoType.OTHER: raise ValueError(algo_name + " is not supported for replay") printGreen("\n" + algo_name + "\n") try: # If args contains episode information, this is for student_evaluation (distillation) if not load_args.episode == -1: load_path = "{}/{}_{}_model.pkl".format(load_args.log_dir, algo_name, load_args.episode,) else: load_path = "{}/{}_model.pkl".format(load_args.log_dir, algo_name) except: printYellow( "No episode of checkpoint specified, go for the default policy model: {}_model.pkl".format(algo_name)) if load_args.log_dir[-3:] != 'pkl': load_path = "{}/{}_model.pkl".format(load_args.log_dir, algo_name) else: load_path = load_args.log_dir load_args.log_dir = os.path.dirname(load_path)+'/' env_globals = json.load(open(load_args.log_dir + "env_globals.json", 'r')) train_args = json.load(open(load_args.log_dir + "args.json", 'r')) env_kwargs = { "renders": load_args.render, "shape_reward": load_args.shape_reward, # Reward sparse or shaped "action_joints": train_args["action_joints"], "is_discrete": not train_args["continuous_actions"], "random_target": train_args.get('random_target', False), "srl_model": train_args["srl_model"] } # load it, if it was defined if "action_repeat" in env_globals: env_kwargs["action_repeat"] = env_globals['action_repeat'] # Remove up action if train_args["env"] == "Kuka2ButtonGymEnv-v0": env_kwargs["force_down"] = env_globals.get('force_down', True) else: env_kwargs["force_down"] = env_globals.get('force_down', False) if train_args["env"] == "OmnirobotEnv-v0": env_kwargs["simple_continual_target"] = env_globals.get("simple_continual_target", False) env_kwargs["circular_continual_move"] = env_globals.get("circular_continual_move", False) env_kwargs["square_continual_move"] = env_globals.get("square_continual_move", False) env_kwargs["eight_continual_move"] = env_globals.get("eight_continual_move", False) # If overriding the environment for specific Continual Learning tasks if sum([load_args.simple_continual, load_args.circular_continual, load_args.square_continual]) >= 1: env_kwargs["simple_continual_target"] = load_args.simple_continual env_kwargs["circular_continual_move"] = load_args.circular_continual env_kwargs["square_continual_move"] = load_args.square_continual env_kwargs["random_target"] = not (load_args.circular_continual or load_args.square_continual) srl_model_path = None if train_args["srl_model"] != "raw_pixels": train_args["policy"] = "mlp" path = env_globals.get('srl_model_path') if path is not None: env_kwargs["use_srl"] = True # Check that the srl saved model exists on the disk assert os.path.isfile(env_globals['srl_model_path']), \ "{} does not exist".format(env_globals['srl_model_path']) srl_model_path = env_globals['srl_model_path'] env_kwargs["srl_model_path"] = srl_model_path return train_args, load_path, algo_name, algo_class, srl_model_path, env_kwargs
def main(): load_args = parseArguments() train_args, load_path, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(load_args) log_dir, envs, algo_args = createEnv(load_args, train_args, algo_name, algo_class, env_kwargs) assert (not load_args.plotting and not load_args.action_proba)\ or load_args.num_cpu == 1, "Error: cannot run plotting with more than 1 CPU" tf.reset_default_graph() set_global_seeds(load_args.seed) # createTensorflowSession() printYellow("Compiling Policy function....") printYellow(load_path) method = algo_class.load(load_path, args=algo_args) dones = [False for _ in range(load_args.num_cpu)] # HACK: check for custom vec env by checking if the last wrapper is WrapFrameStack # this is used for detecting algorithms that have a similar wrapping to deepq # is considered a hack because we are unable to detect if this wrapper was added earlier to the environment object using_custom_vec_env = isinstance(envs, WrapFrameStack) obs = envs.reset() if using_custom_vec_env: obs = obs.reshape((1,) + obs.shape) # plotting init if load_args.plotting: plt.pause(0.1) fig = plt.figure() old_obs = [] if registered_env[train_args["env"]][2] == PlottingType.PLOT_3D: ax = fig.add_subplot(111, projection='3d') line, = ax.plot([], [], [], c=[1, 0, 0, 1], label="episode 0") point = ax.scatter([0], [0], [0], c=[1, 0, 0, 1]) min_zone = [+np.inf, +np.inf, +np.inf] max_zone = [-np.inf, -np.inf, -np.inf] amplitude = [0, 0, 0] min_state_dim = 3 else: ax = fig.add_subplot(111) line, = ax.plot([], [], c=[1, 0, 0, 1], label="episode 0") point = ax.scatter([0], [0], c=[1, 0, 0, 1]) min_zone = [+np.inf, +np.inf] max_zone = [-np.inf, -np.inf] amplitude = [0, 0] min_state_dim = 2 fig.legend() if train_args["srl_model"] in ["ground_truth", "supervised"]: delta_obs = [envs.get_original_obs()[0]] else: # we need to rebuild the PCA representation, in order to visualize correctly in 3D # load the saved representations path = srl_model_path.split("/")[:-1] + "/image_to_state.json" X = np.array(list(json.load(open(path, 'r')).values())) X = fixStateDim(X, min_state_dim=min_state_dim) # estimate the PCA if registered_env[train_args["env"]][2] == PlottingType.PLOT_3D: pca = PCA(n_components=3) else: pca = PCA(n_components=2) pca.fit(X) delta_obs = [pca.transform(fixStateDim([obs[0]], min_state_dim=min_state_dim))[0]] plt.pause(0.00001) # check if the algorithm has a defined getActionProba function before allowing action_proba plotting if load_args.action_proba: if not hasattr(method, "getActionProba"): printYellow("Warning: requested flag --action-proba, " "but the algorihtm {} does not implement 'getActionProba'".format(algo_name)) else: fig_prob = plt.figure() ax_prob = fig_prob.add_subplot(111) old_obs = [] if train_args["continuous_actions"]: ax_prob.set_ylim(np.min(envs.action_space.low), np.max(envs.action_space.high)) bar = ax_prob.bar(np.arange(np.prod(envs.action_space.shape)), np.array([0] * np.prod(envs.action_space.shape)), color=plt.get_cmap('viridis')(int(1 / np.prod(envs.action_space.shape) * 255))) else: ax_prob.set_ylim(0, 1) bar = ax_prob.bar(np.arange(envs.action_space.n), np.array([0] * envs.action_space.n), color=plt.get_cmap('viridis')(int(1 / envs.action_space.n * 255))) plt.pause(1) background_prob = fig_prob.canvas.copy_from_bbox(ax_prob.bbox) n_done = 0 last_n_done = 0 episode = 0 for i in range(load_args.num_timesteps): actions = method.getAction(obs, dones) obs, rewards, dones, _ = envs.step(actions) if using_custom_vec_env: obs = obs.reshape((1,) + obs.shape) # plotting if load_args.plotting: if train_args["srl_model"] in ["ground_truth", "supervised"]: ajusted_obs = envs.get_original_obs()[0] else: ajusted_obs = pca.transform(fixStateDim([obs[0]], min_state_dim=min_state_dim))[0] # create a new line, if the episode is finished if np.sum(dones) > 0: old_obs.append(np.array(delta_obs)) line.set_c(sns.color_palette()[episode % len(sns.color_palette())]) episode += 1 if registered_env[train_args["env"]][2] == PlottingType.PLOT_3D: line, = ax.plot([], [], [], c=[1, 0, 0, 1], label="episode " + str(episode)) else: line, = ax.plot([], [], c=[1, 0, 0, 1], label="episode " + str(episode)) fig.legend() delta_obs = [ajusted_obs] else: delta_obs.append(ajusted_obs) coor_plt = fixStateDim(np.array(delta_obs), min_state_dim=min_state_dim)[1:] unstack_val = coor_plt.shape[1] // train_args.get("num_stack", 1) coor_plt = coor_plt[:, -unstack_val:] # updating the 3d vertices for the line and the dot drawing, to avoid redrawing the entire image if registered_env[train_args["env"]][2] == PlottingType.PLOT_3D: line._verts3d = (coor_plt[:, 0], coor_plt[:, 1], coor_plt[:, 2]) point._offsets3d = (coor_plt[-1:, 0], coor_plt[-1:, 1], coor_plt[-1:, 2]) if coor_plt.shape[0] > 0: min_zone = np.minimum(np.amin(coor_plt, axis=0), min_zone) max_zone = np.maximum(np.amax(coor_plt, axis=0), max_zone) amplitude = max_zone - min_zone + 1e-10 ax.set_xlim(min_zone[0] - abs(amplitude[0] * 0.2), max_zone[0] + abs(amplitude[0] * 0.2)) ax.set_ylim(min_zone[1] - abs(amplitude[1] * 0.2), max_zone[1] + abs(amplitude[1] * 0.2)) ax.set_zlim(min_zone[2] - abs(amplitude[2] * 0.2), max_zone[2] + abs(amplitude[2] * 0.2)) else: line.set_xdata(coor_plt[:, 0]) line.set_ydata(coor_plt[:, 1]) point._offsets = coor_plt[-1:, :] if coor_plt.shape[0] > 0: min_zone = np.minimum(np.amin(coor_plt, axis=0), min_zone) max_zone = np.maximum(np.amax(coor_plt, axis=0), max_zone) amplitude = max_zone - min_zone + 1e-10 ax.set_xlim(min_zone[0] - abs(amplitude[0] * 0.2), max_zone[0] + abs(amplitude[0] * 0.2)) ax.set_ylim(min_zone[1] - abs(amplitude[1] * 0.2), max_zone[1] + abs(amplitude[1] * 0.2)) # Draw every 5 frames to avoid UI freezing if i % 5 == 0: fig.canvas.draw() plt.pause(0.000001) if load_args.action_proba and hasattr(method, "getActionProba"): # When continuous actions are needed, we cannot plot the action probability of every action # in the action space, so we show the action directly instead if train_args["continuous_actions"]: pi = method.getAction(obs, dones) else: pi = method.getActionProba(obs, dones) fig_prob.canvas.restore_region(background_prob) for act, rect in enumerate(bar): if train_args["continuous_actions"]: rect.set_height(pi[0][act]) color_val = np.abs(pi[0][act]) / max(np.max(envs.action_space.high), np.max(np.abs(envs.action_space.low))) else: rect.set_height(softmax(pi[0])[act]) color_val = softmax(pi[0])[act] rect.set_color(plt.get_cmap('viridis')(int(color_val * 255))) ax_prob.draw_artist(rect) fig_prob.canvas.blit(ax_prob.bbox) if using_custom_vec_env: if dones: obs = envs.reset() obs = obs.reshape((1,) + obs.shape) n_done += np.sum(dones) if (n_done - last_n_done) > 1: last_n_done = n_done _, mean_reward = computeMeanReward(log_dir, n_done) print("{} episodes - Mean reward: {:.2f}".format(n_done, mean_reward)) print("print: ", n_done, log_dir) _, mean_reward = computeMeanReward(log_dir, n_done) print("{} episodes - Mean reward: {:.2f}".format(n_done, mean_reward))
def plotGatheredExperiments(folders, algo, y_limits, window=40, title="", min_num_x=-1, timesteps=False, output_file="", no_display=False): """ Compute mean and standard error for several experiments and plot the learning curve :param folders: ([str]) Log folders, where the monitor.csv are stored :param window: (int) Smoothing window :param algo: (str) name of the RL algo :param title: (str) plot title :param min_num_x: (int) Minimum number of episode/timesteps to keep an experiment (default: -1, no minimum) :param timesteps: (bool) Plot timesteps instead of episodes :param y_limits: ([float]) y-limits for the plot :param output_file: (str) Path to a file where the plot data will be saved :param no_display: (bool) Set to true, the plot won't be displayed (useful when only saving plot) """ y_list = [] x_list = [] ok = False for folder in folders: if timesteps: x, y = loadData(folder, smooth=1, bin_size=100) if x is not None: x, y = np.array(x), np.array(y) else: x, y = loadEpisodesData(folder) if x is None or (min_num_x > 0 and y.shape[0] < min_num_x): printYellow("Skipping {}".format(folder)) continue if y.shape[0] <= window: printYellow("Folder {}".format(folder)) printYellow( "Not enough episodes for current window size = {}".format( window)) continue ok = True y = movingAverage(y, window) y_list.append(y) # Truncate x x = x[len(x) - len(y):] x_list.append(x) if not ok: printRed("Not enough data to plot anything with current config." + " Consider decreasing --min-x") return lengths = list(map(len, x_list)) min_x, max_x = np.min(lengths), np.max(lengths) print("Min x: {}".format(min_x)) print("Max x: {}".format(max_x)) for i in range(len(x_list)): x_list[i] = x_list[i][:min_x] y_list[i] = y_list[i][:min_x] x = np.array(x_list)[0] y = np.array(y_list) printGreen("{} Experiments".format(y.shape[0])) print("Min, Max rewards:", np.min(y), np.max(y)) fig = plt.figure(title) # Compute mean for different seeds m = np.mean(y, axis=0) # Compute standard error s = np.squeeze(np.asarray(np.std(y, axis=0))) n = y.shape[0] plt.fill_between(x, m - s / np.sqrt(n), m + s / np.sqrt(n), color=lightcolors[0]) plt.plot(x, m, color=darkcolors[0], label=algo, linewidth=1) if timesteps: formatter = FuncFormatter(millions) plt.xlabel('Number of Timesteps') fig.axes[0].xaxis.set_major_formatter(formatter) else: plt.xlabel('Number of Episodes') plt.ylabel('Rewards') plt.title(title, **fontstyle) plt.ylim(y_limits) plt.legend(framealpha=0.5, labelspacing=0.01, loc='lower right', fontsize=16) if output_file != "": printGreen("Saving aggregated data to {}.npz".format(output_file)) np.savez(output_file, x=x, y=y) if not no_display: plt.show()
def main(): parser = argparse.ArgumentParser( description='Deteministic dataset generator for SRL training ' + '(can be used for environment testing)') parser.add_argument('--num-cpu', type=int, default=1, help='number of cpu to run on') parser.add_argument('--num-episode', type=int, default=50, help='number of episode to run') parser.add_argument('--max_steps_per_epoch', type=int, default=200, help='max num steps per epoch') #CUSTOM ARGS. want to udpate eventually, i.e., specify a specific path for dr parser.add_argument( '--dr', action='store_true', default=False, help= "Include this flag to use the chosen environment with domain randomization" ) parser.add_argument( '--alt', action='store_true', default=False, help= "Include this flag to use the chosen environment with alternate view") parser.add_argument( '--special_start', action='store_true', default=False, help= "Include this flag to use the chosen environment with the special start" ) parser.add_argument( '--save-path', type=str, default='robotics-rl-srl/data/', help='Folder where the environments will save the output') parser.add_argument('--name', type=str, default='UNSETNAME', help='Folder name for the output') parser.add_argument('--env', type=str, default='push_rotate', help='The environment wanted', choices=list(envs.keys())) parser.add_argument('--display', action='store_true', default=False) parser.add_argument('--no-record-data', action='store_true', default=False) parser.add_argument('--seed', type=int, default=0, help='the seed') parser.add_argument( '-f', '--force', action='store_true', default=False, help='Force the save, even if it overrides something else,' + ' including partial parts if they exist') #TODO: Change this argument to be for the diff types of tasks parser.add_argument('--multi-view', action='store_true', default=False, help='Set a second camera to the scene') parser.add_argument( '--reward-dist', action='store_true', default=False, help= 'Prints out the reward distribution when the dataset generation is finished' ) parser.add_argument('--run-ppo2', action='store_true', default=False, help='runs a ppo2 agent instead of a random agent') parser.add_argument( '--ppo2-timesteps', type=int, default=1000, help='number of timesteps to run PPO2 on before generating the dataset' ) args = parser.parse_args() assert (args.num_cpu > 0), "Error: number of cpu must be positive and non zero" assert (args.num_episode > 0), "Error: number of episodes must be positive and non zero" # assert not(registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \ # "Error: cannot have more than 1 CPU for the environment {}".format(args.env) if args.num_cpu > args.num_episode: args.num_cpu = args.num_episode printYellow( "num_cpu cannot be greater than num_episode, defaulting to {} cpus." .format(args.num_cpu)) # this is done so seed 0 and 1 are different and not simply offset of the same datasets. args.seed = np.random.RandomState(args.seed).randint(int(1e10)) # File exists, need to deal with it if not args.no_record_data and os.path.exists(args.save_path + args.name): assert args.force, "Error: save directory '{}' already exists".format( args.save_path + args.name) shutil.rmtree(args.save_path + args.name) for part in glob.glob(args.save_path + args.name + "_part-[0-9]*"): shutil.rmtree(part) if not args.no_record_data: # create the output os.makedirs(args.save_path + args.name, exist_ok=True) if args.num_cpu == 1: env_thread(args, 0, partition=False, use_ppo2=args.run_ppo2) else: # try and divide into multiple processes, with an environment each try: jobs = [] for i in range(args.num_cpu): process = multiprocessing.Process(target=env_thread, args=(args, i, True, args.run_ppo2)) jobs.append(process) for j in jobs: j.start() try: for j in jobs: j.join() except Exception as e: printRed("Error: unable to join thread") raise e except Exception as e: printRed("Error: unable to start thread") raise e if not args.no_record_data and args.num_cpu > 1: # sleep 1 second, to avoid congruency issues from multiprocess (eg., files still writing) time.sleep(1) # get all the parts file_parts = sorted(glob.glob(args.save_path + args.name + "_part-[0-9]*"), key=lambda a: int(a.split("-")[-1])) # move the config files from any as they are identical os.rename(file_parts[0] + "/dataset_config.json", args.save_path + args.name + "/dataset_config.json") os.rename(file_parts[0] + "/env_globals.json", args.save_path + args.name + "/env_globals.json") ground_truth = None preprocessed_data = None # used to convert the part record_id to the fused record_id record_id = 0 for part in file_parts: # sort the record names alphabetically, then numerically records = sorted(glob.glob(part + "/record_[0-9]*"), key=lambda a: int(a.split("_")[-1])) record_id_start = record_id for record in records: os.renames( record, args.save_path + args.name + "/record_{:03d}".format(record_id)) record_id += 1 # fuse the npz files together, in the right order if ground_truth is None: # init ground_truth = {} preprocessed_data = {} ground_truth_load = np.load(part + "/ground_truth.npz") preprocessed_data_load = np.load(part + "/preprocessed_data.npz") for arr in ground_truth_load.files: if arr == "images_path": ground_truth[arr] = np.array([ convertImagePath(args, path, record_id_start) for path in ground_truth_load[arr] ]) else: ground_truth[arr] = ground_truth_load[arr] for arr in preprocessed_data_load.files: preprocessed_data[arr] = preprocessed_data_load[arr] else: ground_truth_load = np.load(part + "/ground_truth.npz") preprocessed_data_load = np.load(part + "/preprocessed_data.npz") for arr in ground_truth_load.files: if arr == "images_path": sanitised_paths = np.array([ convertImagePath(args, path, record_id_start) for path in ground_truth_load[arr] ]) ground_truth[arr] = np.concatenate( (ground_truth[arr], sanitised_paths)) else: ground_truth[arr] = np.concatenate( (ground_truth[arr], ground_truth_load[arr])) for arr in preprocessed_data_load.files: preprocessed_data[arr] = np.concatenate( (preprocessed_data[arr], preprocessed_data_load[arr])) # remove the current part folder shutil.rmtree(part) # save the fused outputs np.savez(args.save_path + args.name + "/ground_truth.npz", **ground_truth) np.savez(args.save_path + args.name + "/preprocessed_data.npz", **preprocessed_data) if args.reward_dist: rewards, counts = np.unique( np.load(args.save_path + args.name + "/preprocessed_data.npz")['rewards'], return_counts=True) counts = [ "{:.2f}%".format(val * 100) for val in counts / np.sum(counts) ] print("reward distribution:") [ print(" ", reward, count) for reward, count in list(zip(rewards, counts)) ]