예제 #1
0
    def train(self, args, callback, env_kwargs=None, train_kwargs=None):
        if train_kwargs is None:
            train_kwargs = {}

        if args.srl_model == "raw_pixels":
            printYellow(
                "Warning: ACKTR can have memory issues when running with raw_pixels"
            )

        param_kwargs = {
            "verbose": 1,
            "n_steps": 5,
            "vf_coef": 0.5,
            "ent_coef": 0.01,
            "max_grad_norm": 0.5,
            "learning_rate": 7e-4,
            "vf_fisher_coef": 1.0,
            "gamma": 0.99,
            "lr_schedule": args.lr_schedule
        }

        super().train(args, callback, env_kwargs, {
            **param_kwargs,
            **train_kwargs
        })
예제 #2
0
def allPolicyFiles(log_dir):
    """

    :param log_dir:
    :return:
    """
    train_args, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(
        log_dir)
    files = glob.glob(os.path.join(log_dir + algo_name + '_*_model.pkl'))
    printYellow(log_dir)
    files = glob.glob(log_dir + '/model_*')

    files_list = []
    for file in files:
        eps = int((file.split('_')[-1]))
        files_list.append((eps, file + '/'))

    def sortFirst(val):
        """

        :param val:
        :return:
        """
        return val[0]

    files_list.sort(key=sortFirst)
    res = np.array(files_list)
    return res[:, 0], res[:, 1]
예제 #3
0
def configureEnvAndLogFolder(args, env_kwargs, all_models):
    """
    :param args: (ArgumentParser object)
    :param env_kwargs: (dict) The extra arguments for the environment
    :param all_models: (dict) The location of all the trained SRL models
    :return: (ArgumentParser object, dict)
    """
    global PLOT_TITLE, LOG_DIR
    # Reward sparse or shaped
    env_kwargs["shape_reward"] = args.shape_reward
    # Actions in joint space or relative position space
    env_kwargs["action_joints"] = args.action_joints
    args.log_dir += args.env + "/"

    models = all_models[
        args.env]  ## models: config file dict of srl_model path
    PLOT_TITLE = args.srl_model
    path = models.get(args.srl_model)
    args.log_dir += args.srl_model + "/"

    env_kwargs["srl_model"] = args.srl_model
    if registered_srl[args.srl_model][0] == SRLType.SRL:
        env_kwargs["use_srl"] = True
        if args.latest:
            printYellow("Using latest srl model in {}".format(
                models['log_folder']))
            env_kwargs["srl_model_path"] = latestPath(models['log_folder'])
        else:
            assert path is not None, "Error: SRL path not defined for {} in {}".format(
                args.srl_model, args.srl_config_file)
            # Path depending on whether to load the latest model or not
            if args.srl_model_path is not None:  ## [HACK way to pass srl model weights path to terminal]
                assert os.path.exists(
                    args.srl_model_path
                ), "SRL model weights: {} doesn't exist.".format(
                    args.srl_model_path)
                env_kwargs["srl_model_path"] = args.srl_model_path
            else:
                srl_model_path = models['log_folder'] + path
                env_kwargs["srl_model_path"] = srl_model_path
    # Use of continual learning env
    env_kwargs["simple_continual_target"] = args.simple_continual
    env_kwargs["circular_continual_move"] = args.circular_continual
    env_kwargs["square_continual_move"] = args.square_continual
    env_kwargs["eight_continual_move"] = args.eight_continual

    # Add date + current time
    args.log_dir += "{}/{}/".format(
        ALGO_NAME,
        datetime.now().strftime("%y-%m-%d_%Hh%M_%S"))
    LOG_DIR = args.log_dir
    # wait one second if the folder exist to avoid overwritting logs
    time.sleep(1)
    os.makedirs(args.log_dir, exist_ok=True)

    return args, env_kwargs
예제 #4
0
def loadRunningAverage(envs, load_path_normalise=None):
    if load_path_normalise is not None:
        try:
            printGreen("Loading saved running average")
            envs.load_running_average(load_path_normalise)
            envs.training = False
        except FileNotFoundError:
            envs.training = True
            printYellow("Running Average files not found for VecNormalize, switching to training mode")
    return envs
예제 #5
0
    def __init__(self,
                 name,
                 max_dist,
                 state_dim=-1,
                 globals_=None,
                 learn_every=3,
                 learn_states=False,
                 path='data/',
                 relative_pos=False):
        super(EpisodeSaver, self).__init__()
        self.name = name
        self.data_folder = path + name
        self.path = path
        try:
            os.makedirs(self.data_folder)
        except OSError:
            printYellow("Folder already exist")

        self.actions = []
        self.actions_proba = []
        self.rewards = []
        self.images = []
        self.target_positions = []
        self.episode_starts = []
        self.ground_truth_states = []
        self.images_path = []
        self.episode_step = 0
        self.episode_idx = -1
        self.episode_folder = None
        self.episode_success = False
        self.state_dim = state_dim
        self.learn_states = learn_states
        self.learn_every = learn_every  # Every n episodes, learn a state representation
        self.srl_model_path = ""
        self.n_steps = 0
        self.max_steps = 10000

        self.dataset_config = {
            'relative_pos': relative_pos,
            'max_dist': str(max_dist)
        }
        with open("{}/dataset_config.json".format(self.data_folder), "w") as f:
            json.dump(self.dataset_config, f)

        if globals_ is not None:
            # Save environments parameters
            with open("{}/env_globals.json".format(self.data_folder),
                      "w") as f:
                json.dump(filterJSONSerializableObjects(globals_), f)

        if self.learn_states:
            self.socket_client = SRLClient(self.name)
            self.socket_client.waitForServer()
예제 #6
0
    def __init__(self, log_folder):
        super(LogRLStates, self).__init__()

        self.log_folder = log_folder + 'log_srl/'
        try:
            os.makedirs(self.log_folder)
        except OSError:
            printYellow("Folder already exist")

        self.actions = []
        self.rewards = []
        self.states = []
        self.normalized_states = []
예제 #7
0
    def makeEnv(cls, args, env_kwargs=None, load_path_normalise=None):
        # Even though DeepQ is single core only, we need to use the pipe system to work
        if env_kwargs is not None and env_kwargs.get("use_srl", False):
            srl_model = MultiprocessSRLModel(1, args.env, env_kwargs)
            env_kwargs["state_dim"] = srl_model.state_dim
            env_kwargs["srl_pipe"] = srl_model.pipe

        envs = DummyVecEnv([makeEnv(args.env, args.seed, 0, args.log_dir, env_kwargs=env_kwargs)])
        envs = VecFrameStack(envs, args.num_stack)

        if args.srl_model != "raw_pixels":
            printYellow("Using MLP policy because working on state representation")
            envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
            envs = loadRunningAverage(envs, load_path_normalise=load_path_normalise)

        return envs
예제 #8
0
def createEnvs(args,
               allow_early_resets=False,
               env_kwargs=None,
               load_path_normalise=None):
    """
    :param args: (argparse.Namespace Object)
    :param allow_early_resets: (bool) Allow reset before the enviroment is done, usually used in ES to halt the envs
    :param env_kwargs: (dict) The extra arguments for the environment
    :param load_path_normalise: (str) the path to loading the rolling average, None if not available or wanted.
    :return: (Gym VecEnv)
    """
    # imported here to prevent cyclic imports
    from environments.registry import registered_env
    from state_representation.registry import registered_srl, SRLType

    assert not (registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \
        "Error: cannot have more than 1 CPU for the environment {}".format(args.env)

    if env_kwargs is not None and registered_srl[
            args.srl_model][0] == SRLType.SRL:
        srl_model = MultiprocessSRLModel(args.num_cpu, args.env, env_kwargs)
        env_kwargs["state_dim"] = srl_model.state_dim
        env_kwargs["srl_pipe"] = srl_model.pipe
    envs = [
        makeEnv(args.env,
                args.seed,
                i,
                args.log_dir,
                allow_early_resets=allow_early_resets,
                env_kwargs=env_kwargs) for i in range(args.num_cpu)
    ]

    if len(envs) == 1:
        # No need for subprocesses when having only one env
        envs = DummyVecEnv(envs)
    else:
        envs = SubprocVecEnv(envs)

    envs = VecFrameStack(envs, args.num_stack)

    if args.srl_model != "raw_pixels":
        printYellow("Using MLP policy because working on state representation")
        envs = VecNormalize(envs, norm_obs=True, norm_reward=False)
        envs = loadRunningAverage(envs,
                                  load_path_normalise=load_path_normalise)

    return envs
예제 #9
0
    def __init__(self, name, env_name=None, path='data/'):
        super(EpisodeSaver, self).__init__()
        self.name = name
        self.data_folder = path + name
        self.path = path
        try:
            os.makedirs(self.data_folder)
        except OSError:
            printYellow("Folder already exist")

        self.actions = []
        self.rewards = []
        self.images = []
        self.episode_starts = []
        self.ground_truth_states = []
        self.images_path = []
        self.episode_step = 0
        self.episode_idx = -1
        self.episode_folder = None
        self.episode_success = False
        self.n_steps = 0

        self.env_name = env_name
예제 #10
0
    # TODO: check that the parameters are the same between Experiments
    folders = []
    other = []
    train_args = {}
    for folder in os.listdir(args.log_dir):
        path = "{}/{}/".format(args.log_dir, folder)
        env_globals = json.load(open(path + "env_globals.json", 'r'))
        train_args = json.load(open(path + "args.json", 'r'))
        if train_args["shape_reward"] == args.shape_reward:
            folders.append(path)
        else:
            other.append(path)

    if len(folders) == 0 and len(other) == 0:
        printYellow(
            "No experiment found. Is the folder path {} correct?".format(
                args.log_dir))
        exit()
    elif len(folders) == 0:
        printYellow(
            "No experiments found with the given criterion. However {} experiments"
            .format(len(other)) + " where found {} reward shaping. ".format(
                "without" if args.shape_reward else "with") +
            "Did you mean {} the flag '--shape-reward'?".format(
                "without" if args.shape_reward else "with"))
        exit()

    srl_model = train_args[
        'srl_model'] if train_args['srl_model'] != "" else "raw pixels"
    if args.timesteps:
        title = srl_model + " [Timesteps]"
예제 #11
0
def main():
    # Global variables for callback
    global ENV_NAME, ALGO, ALGO_NAME, LOG_INTERVAL, VISDOM_PORT, viz
    global SAVE_INTERVAL, EPISODE_WINDOW, MIN_EPISODES_BEFORE_SAVE
    parser = argparse.ArgumentParser(
        description="Train script for RL algorithms")
    parser.add_argument('--algo',
                        default='ppo2',
                        choices=list(registered_rl.keys()),
                        help='RL algo to use',
                        type=str)
    parser.add_argument('--env',
                        type=str,
                        help='environment ID',
                        default='KukaButtonGymEnv-v0',
                        choices=list(registered_env.keys()))
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='random seed (default: 0)')
    parser.add_argument(
        '--episode-window',
        type=int,
        default=40,
        help='Episode window for moving average plot (default: 40)')
    parser.add_argument(
        '--log-dir',
        default='/tmp/gym/',
        type=str,
        help='directory to save agent logs and model (default: /tmp/gym)')
    parser.add_argument('--num-timesteps', type=int, default=int(1e6))
    parser.add_argument('--srl-model',
                        type=str,
                        default='raw_pixels',
                        choices=list(registered_srl.keys()),
                        help='SRL model to use')
    parser.add_argument('--num-stack',
                        type=int,
                        default=1,
                        help='number of frames to stack (default: 1)')
    parser.add_argument(
        '--action-repeat',
        type=int,
        default=1,
        help='number of times an action will be repeated (default: 1)')
    parser.add_argument('--port',
                        type=int,
                        default=8097,
                        help='visdom server port (default: 8097)')
    parser.add_argument('--no-vis',
                        action='store_true',
                        default=False,
                        help='disables visdom visualization')
    parser.add_argument(
        '--shape-reward',
        action='store_true',
        default=False,
        help='Shape the reward (reward = - distance) instead of a sparse reward'
    )
    parser.add_argument('-c',
                        '--continuous-actions',
                        action='store_true',
                        default=False)
    parser.add_argument(
        '-joints',
        '--action-joints',
        action='store_true',
        default=False,
        help=
        'set actions to the joints of the arm directly, instead of inverse kinematics'
    )
    parser.add_argument('-r',
                        '--random-target',
                        action='store_true',
                        default=False,
                        help='Set the button to a random position')
    parser.add_argument(
        '--srl-config-file',
        type=str,
        default="config/srl_models.yaml",
        help='Set the location of the SRL model path configuration.')
    parser.add_argument('--hyperparam', type=str, nargs='+', default=[])
    parser.add_argument('--min-episodes-save',
                        type=int,
                        default=100,
                        help="Min number of episodes before saving best model")
    parser.add_argument(
        '--latest',
        action='store_true',
        default=False,
        help=
        'load the latest learned model (location:srl_zoo/logs/DatasetName/)')
    parser.add_argument(
        '--load-rl-model-path',
        type=str,
        default=None,
        help="load the trained RL model, should be with the same algorithm type"
    )
    parser.add_argument(
        '-sc',
        '--simple-continual',
        action='store_true',
        default=False,
        help=
        'Simple red square target for task 1 of continual learning scenario. '
        + 'The task is: robot should reach the target.')
    parser.add_argument(
        '-cc',
        '--circular-continual',
        action='store_true',
        default=False,
        help='Blue square target for task 2 of continual learning scenario. ' +
        'The task is: robot should turn in circle around the target.')
    parser.add_argument(
        '-sqc',
        '--square-continual',
        action='store_true',
        default=False,
        help='Green square target for task 3 of continual learning scenario. '
        + 'The task is: robot should turn in square around the target.')
    parser.add_argument(
        '-ec',
        '--eight-continual',
        action='store_true',
        default=False,
        help='Green square target for task 4 of continual learning scenario. '
        +
        'The task is: robot should do the eigth with the target as center of the shape.'
    )
    parser.add_argument('--teacher-data-folder',
                        type=str,
                        default="",
                        help='Dataset folder of the teacher(s) policy(ies)',
                        required=False)
    parser.add_argument(
        '--epochs-distillation',
        type=int,
        default=30,
        metavar='N',
        help='number of epochs to train for distillation(default: 30)')
    parser.add_argument(
        '--distillation-training-set-size',
        type=int,
        default=-1,
        help='Limit size (number of samples) of the training set (default: -1)'
    )
    parser.add_argument(
        '--perform-cross-evaluation-cc',
        action='store_true',
        default=False,
        help='A cross evaluation from the latest stored model to all tasks')
    parser.add_argument(
        '--eval-episode-window',
        type=int,
        default=400,
        metavar='N',
        help=
        'Episode window for saving each policy checkpoint for future distillation(default: 100)'
    )
    parser.add_argument(
        '--new-lr',
        type=float,
        default=1.e-4,
        help="New learning rate ratio to train a pretrained agent")
    parser.add_argument('--img-shape',
                        type=str,
                        default="(3,64,64)",
                        help="Image shape of environment.")
    parser.add_argument(
        "--gpu-num",
        help="Choose the number of GPU (CUDA_VISIBLE_DEVICES).",
        type=str,
        default="1",
        choices=["0", "1", "2", "3", "5", "6", "7", "8"])
    parser.add_argument("--srl-model-path",
                        help="SRL model weights path",
                        type=str,
                        default=None)
    parser.add_argument(
        "--relative-pos",
        action='store_true',
        default=False,
        help="For 'ground_truth': use relative position or not.")
    # Ignore unknown args for now
    args, unknown = parser.parse_known_args()
    # os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_num
    env_kwargs = {}
    if args.img_shape is None:
        img_shape = None  #(3,224,224)
    else:
        img_shape = tuple(map(int, args.img_shape[1:-1].split(",")))
    env_kwargs['img_shape'] = img_shape
    # LOAD SRL models list
    assert os.path.exists(args.srl_config_file), \
        "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file)
    with open(args.srl_config_file, 'rb') as f:
        all_models = yaml.load(f)
    # Sanity check
    assert args.episode_window >= 1, "Error: --episode_window cannot be less than 1"
    assert args.num_timesteps >= 1, "Error: --num-timesteps cannot be less than 1"
    assert args.num_stack >= 1, "Error: --num-stack cannot be less than 1"
    assert args.action_repeat >= 1, "Error: --action-repeat cannot be less than 1"
    assert 0 <= args.port < 65535, "Error: invalid visdom port number {}, ".format(args.port) + \
                                   "port number must be an unsigned 16bit number [0,65535]."
    assert registered_srl[args.srl_model][0] == SRLType.ENVIRONMENT or args.env in all_models, \
        "Error: the environment {} has no srl_model defined in 'srl_models.yaml'. Cannot continue.".format(args.env)
    # check that all the SRL_model can be run on the environment
    if registered_srl[args.srl_model][1] is not None:
        found = False
        for compatible_class in registered_srl[args.srl_model][1]:
            if issubclass(compatible_class, registered_env[args.env][0]):
                found = True
                break
        assert found, "Error: srl_model {}, is not compatible with the {} environment.".format(
            args.srl_model, args.env)

    assert not(sum([args.simple_continual, args.circular_continual, args.square_continual, args.eight_continual]) \
           > 1 and args.env == "OmnirobotEnv-v0"), \
        "For continual SRL and RL, please provide only one scenario at the time and use OmnirobotEnv-v0 environment !"

    assert not(args.algo == "distillation" and (args.teacher_data_folder == '' or args.continuous_actions is True)), \
        "For performing policy distillation, make sure use specify a valid teacher dataset and discrete actions !"

    ENV_NAME = args.env
    ALGO_NAME = args.algo
    VISDOM_PORT = args.port
    EPISODE_WINDOW = args.episode_window
    MIN_EPISODES_BEFORE_SAVE = args.min_episodes_save
    CROSS_EVAL = args.perform_cross_evaluation_cc
    EPISODE_WINDOW_DISTILLATION_WIN = args.eval_episode_window
    NEW_LR = args.new_lr
    print("EPISODE_WINDOW_DISTILLATION_WIN: ", EPISODE_WINDOW_DISTILLATION_WIN)

    if args.no_vis:
        viz = False

    algo_class, algo_type, action_type = registered_rl[args.algo]
    algo = algo_class()
    ALGO = algo

    # if callback frequency needs to be changed
    LOG_INTERVAL = algo.LOG_INTERVAL
    SAVE_INTERVAL = algo.SAVE_INTERVAL

    if not args.continuous_actions and ActionType.DISCRETE not in action_type:
        raise ValueError(
            args.algo +
            " does not support discrete actions, please use the '--continuous-actions' "
            + "(or '-c') flag.")
    if args.continuous_actions and ActionType.CONTINUOUS not in action_type:
        raise ValueError(
            args.algo +
            " does not support continuous actions, please remove the '--continuous-actions' "
            + "(or '-c') flag.")

    env_kwargs["is_discrete"] = not args.continuous_actions

    printGreen("\nAgent = {} \n".format(args.algo))

    env_kwargs["action_repeat"] = args.action_repeat
    # Random init position for button
    env_kwargs["random_target"] = args.random_target

    # If in simple continual scenario, then the target should be initialized randomly.
    if args.simple_continual is True:
        env_kwargs["random_target"] = True

    # Allow up action
    # env_kwargs["force_down"] = False

    # allow multi-view
    env_kwargs['multi_view'] = args.srl_model == "multi_view_srl"
    parser = algo.customArguments(parser)
    args = parser.parse_args()

    args, env_kwargs = configureEnvAndLogFolder(args, env_kwargs, all_models)
    args_dict = filterJSONSerializableObjects(vars(args))
    # Save args
    with open(LOG_DIR + "args.json", "w") as f:
        json.dump(args_dict, f)

    env_class = registered_env[args.env][0]
    # env default kwargs
    default_env_kwargs = {
        k: v.default
        for k, v in inspect.signature(env_class.__init__).parameters.items()
        if v is not None
    }

    globals_env_param = sys.modules[env_class.__module__].getGlobals()
    ### HACK way to reset image shape !!
    globals_env_param['RENDER_HEIGHT'] = img_shape[1]
    globals_env_param['RENDER_WIDTH'] = img_shape[2]
    globals_env_param['RELATIVE_POS'] = args.relative_pos

    super_class = registered_env[args.env][1]
    # reccursive search through all the super classes of the asked environment, in order to get all the arguments.
    rec_super_class_lookup = {
        dict_class: dict_super_class
        for _, (dict_class, dict_super_class, _, _) in registered_env.items()
    }
    while super_class != SRLGymEnv:
        assert super_class in rec_super_class_lookup, "Error: could not find super class of {}".format(super_class) + \
                                                      ", are you sure \"registered_env\" is correctly defined?"
        super_env_kwargs = {
            k: v.default
            for k, v in inspect.signature(
                super_class.__init__).parameters.items() if v is not None
        }
        default_env_kwargs = {**super_env_kwargs, **default_env_kwargs}

        globals_env_param = {
            **sys.modules[super_class.__module__].getGlobals(),
            **globals_env_param
        }

        super_class = rec_super_class_lookup[super_class]

    # Print Variables
    printYellow("Arguments:")
    pprint(args_dict)
    printYellow("Env Globals:")
    pprint(
        filterJSONSerializableObjects({
            **globals_env_param,
            **default_env_kwargs,
            **env_kwargs
        }))
    # Save env params
    saveEnvParams(globals_env_param, {**default_env_kwargs, **env_kwargs})
    # Seed tensorflow, python and numpy random generator
    set_global_seeds(args.seed)
    # Augment the number of timesteps (when using mutliprocessing this number is not reached)
    args.num_timesteps = int(1.1 * args.num_timesteps)
    # Get the hyperparameter, if given (Hyperband)
    hyperparams = {
        param.split(":")[0]: param.split(":")[1]
        for param in args.hyperparam
    }
    hyperparams = algo.parserHyperParam(hyperparams)

    if args.load_rl_model_path is not None:
        #use a small learning rate
        print("use a small learning rate: {:f}".format(1.0e-4))
        hyperparams["learning_rate"] = lambda f: f * 1.0e-4

    # Train the agent
    if args.load_rl_model_path is not None:
        algo.setLoadPath(args.load_rl_model_path)
    algo.train(args, callback, env_kwargs=env_kwargs, train_kwargs=hyperparams)
예제 #12
0
def main():
    parser = argparse.ArgumentParser(
        description='Deteministic dataset generator for SRL training ' +
        '(can be used for environment testing)')
    parser.add_argument('--num-cpu',
                        type=int,
                        default=1,
                        help='number of cpu to run on')
    parser.add_argument('--num-episode',
                        type=int,
                        default=50,
                        help='number of episode to run')
    parser.add_argument(
        '--save-path',
        type=str,
        default='srl_zoo/data/',
        help='Folder where the environments will save the output')
    parser.add_argument('--name',
                        type=str,
                        default='kuka_button',
                        help='Folder name for the output')
    parser.add_argument('--env',
                        type=str,
                        default='KukaButtonGymEnv-v0',
                        help='The environment wanted',
                        choices=list(registered_env.keys()))
    parser.add_argument('--display', action='store_true', default=False)
    parser.add_argument('--no-record-data', action='store_true', default=False)
    parser.add_argument(
        '--max-distance',
        type=float,
        default=0.28,
        help=
        'Beyond this distance from the goal, the agent gets a negative reward')
    parser.add_argument('-c',
                        '--continuous-actions',
                        action='store_true',
                        default=False)
    parser.add_argument('--seed', type=int, default=0, help='the seed')
    parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        default=False,
        help='Force the save, even if it overrides something else,' +
        ' including partial parts if they exist')
    parser.add_argument('-r',
                        '--random-target',
                        action='store_true',
                        default=False,
                        help='Set the button to a random position')
    parser.add_argument('--multi-view',
                        action='store_true',
                        default=False,
                        help='Set a second camera to the scene')
    parser.add_argument(
        '--shape-reward',
        action='store_true',
        default=False,
        help='Shape the reward (reward = - distance) instead of a sparse reward'
    )
    parser.add_argument(
        '--reward-dist',
        action='store_true',
        default=False,
        help=
        'Prints out the reward distribution when the dataset generation is finished'
    )
    parser.add_argument(
        '--run-policy',
        type=str,
        default="random",
        choices=VALID_POLICIES,
        help='Policy to run for data collection ' +
        '(random, localy pretrained ppo2, pretrained custom policy)')
    parser.add_argument(
        '--log-custom-policy',
        type=str,
        default='',
        help='Logs of the custom pretained policy to run for data collection')
    parser.add_argument(
        '--latest',
        action='store_true',
        default=False,
        help='load the latest learned model (location: args.log-custom-policy)'
    )
    parser.add_argument(
        '-rgm',
        '--replay-generative-model',
        type=str,
        default="",
        choices=['vae'],
        help=
        'Generative model to replay for generating a dataset (for Continual Learning purposes)'
    )
    parser.add_argument(
        '--log-generative-model',
        type=str,
        default='',
        help='Logs of the custom pretained policy to run for data collection')
    parser.add_argument(
        '--ppo2-timesteps',
        type=int,
        default=1000,
        help='number of timesteps to run PPO2 on before generating the dataset'
    )
    parser.add_argument(
        '--toward-target-timesteps-proportion',
        type=float,
        default=0.0,
        help=
        "propotion of timesteps that use simply towards target policy, should be 0.0 to 1.0"
    )
    parser.add_argument(
        '-sc',
        '--simple-continual',
        action='store_true',
        default=False,
        help=
        'Simple red square target for task 1 of continual learning scenario. '
        + 'The task is: robot should reach the target.')
    parser.add_argument(
        '-cc',
        '--circular-continual',
        action='store_true',
        default=False,
        help='Blue square target for task 2 of continual learning scenario. ' +
        'The task is: robot should turn in circle around the target.')
    parser.add_argument(
        '-sqc',
        '--square-continual',
        action='store_true',
        default=False,
        help='Green square target for task 3 of continual learning scenario. '
        + 'The task is: robot should turn in square around the target.')
    parser.add_argument(
        '--short-episodes',
        action='store_true',
        default=False,
        help=
        'Generate short episodes (only 10 contacts with the target allowed).')
    parser.add_argument('--episode',
                        type=int,
                        default=-1,
                        help='Model saved at episode N that we want to load')

    args = parser.parse_args()

    assert (args.num_cpu >
            0), "Error: number of cpu must be positive and non zero"
    assert (args.max_distance >
            0), "Error: max distance must be positive and non zero"
    assert (args.num_episode >
            0), "Error: number of episodes must be positive and non zero"
    assert not args.reward_dist or not args.shape_reward, \
        "Error: cannot display the reward distribution for continuous reward"
    assert not(registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \
        "Error: cannot have more than 1 CPU for the environment {}".format(args.env)
    if args.num_cpu > args.num_episode:
        args.num_cpu = args.num_episode
        printYellow(
            "num_cpu cannot be greater than num_episode, defaulting to {} cpus."
            .format(args.num_cpu))

    assert sum([args.simple_continual, args.circular_continual, args.square_continual]) <= 1, \
        "For continual SRL and RL, please provide only one scenario at the time !"

    assert not (args.log_custom_policy == '' and args.run_policy in ['walker', 'custom']), \
        "If using a custom policy, please specify a valid log folder for loading it."

    assert not (args.log_generative_model == '' and args.replay_generative_model == 'custom'), \
        "If using a custom policy, please specify a valid log folder for loading it."

    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)

    # this is done so seed 0 and 1 are different and not simply offset of the same datasets.
    args.seed = np.random.RandomState(args.seed).randint(int(1e10))

    # File exists, need to deal with it
    if not args.no_record_data and os.path.exists(args.save_path + args.name):
        assert args.force, "Error: save directory '{}' already exists".format(
            args.save_path + args.name)

        shutil.rmtree(args.save_path + args.name)
        for part in glob.glob(args.save_path + args.name + "_part-[0-9]*"):
            shutil.rmtree(part)
    if not args.no_record_data:
        # create the output
        os.mkdir(args.save_path + args.name)

    if args.num_cpu == 1:
        env_thread(args, 0, partition=False)
    else:
        # try and divide into multiple processes, with an environment each
        try:
            jobs = []
            for i in range(args.num_cpu):
                process = multiprocessing.Process(target=env_thread,
                                                  args=(args, i, True))
                jobs.append(process)

            for j in jobs:
                j.start()

            try:
                for j in jobs:
                    j.join()
            except Exception as e:
                printRed("Error: unable to join thread")
                raise e

        except Exception as e:
            printRed("Error: unable to start thread")
            raise e

    if not args.no_record_data and args.num_cpu > 1:
        # sleep 1 second, to avoid congruency issues from multiprocess (eg., files still writing)
        time.sleep(1)
        # get all the parts
        file_parts = sorted(glob.glob(args.save_path + args.name +
                                      "_part-[0-9]*"),
                            key=lambda a: int(a.split("-")[-1]))

        # move the config files from any as they are identical
        os.rename(file_parts[0] + "/dataset_config.json",
                  args.save_path + args.name + "/dataset_config.json")
        os.rename(file_parts[0] + "/env_globals.json",
                  args.save_path + args.name + "/env_globals.json")

        ground_truth = None
        preprocessed_data = None

        # used to convert the part record_id to the fused record_id
        record_id = 0
        for part in file_parts:
            # sort the record names alphabetically, then numerically
            records = sorted(glob.glob(part + "/record_[0-9]*"),
                             key=lambda a: int(a.split("_")[-1]))

            record_id_start = record_id
            for record in records:
                os.renames(
                    record, args.save_path + args.name +
                    "/record_{:03d}".format(record_id))
                record_id += 1

            # fuse the npz files together, in the right order
            if ground_truth is None:
                # init
                ground_truth = {}
                preprocessed_data = {}
                ground_truth_load = np.load(part + "/ground_truth.npz")
                preprocessed_data_load = np.load(part +
                                                 "/preprocessed_data.npz")

                for arr in ground_truth_load.files:
                    if arr == "images_path":
                        ground_truth[arr] = np.array([
                            convertImagePath(args, path, record_id_start)
                            for path in ground_truth_load[arr]
                        ])
                    else:
                        ground_truth[arr] = ground_truth_load[arr]
                for arr in preprocessed_data_load.files:
                    preprocessed_data[arr] = preprocessed_data_load[arr]

            else:
                ground_truth_load = np.load(part + "/ground_truth.npz")
                preprocessed_data_load = np.load(part +
                                                 "/preprocessed_data.npz")

                for arr in ground_truth_load.files:
                    if arr == "images_path":
                        sanitised_paths = np.array([
                            convertImagePath(args, path, record_id_start)
                            for path in ground_truth_load[arr]
                        ])
                        ground_truth[arr] = np.concatenate(
                            (ground_truth[arr], sanitised_paths))
                    else:
                        ground_truth[arr] = np.concatenate(
                            (ground_truth[arr], ground_truth_load[arr]))
                for arr in preprocessed_data_load.files:
                    preprocessed_data[arr] = np.concatenate(
                        (preprocessed_data[arr], preprocessed_data_load[arr]))

            # remove the current part folder
            shutil.rmtree(part)

        # save the fused outputs
        np.savez(args.save_path + args.name + "/ground_truth.npz",
                 **ground_truth)
        np.savez(args.save_path + args.name + "/preprocessed_data.npz",
                 **preprocessed_data)

    if args.reward_dist:
        rewards, counts = np.unique(
            np.load(args.save_path + args.name +
                    "/preprocessed_data.npz")['rewards'],
            return_counts=True)
        counts = [
            "{:.2f}%".format(val * 100) for val in counts / np.sum(counts)
        ]
        print("reward distribution:")
        [
            print(" ", reward, count)
            for reward, count in list(zip(rewards, counts))
        ]
예제 #13
0
def env_thread(args, thread_num, partition=True):
    """
    Run a session of an environment
    :param args: (ArgumentParser object)
    :param thread_num: (int) The thread ID of the environment session
    :param partition: (bool) If the output should be in multiple parts (default=True)
    """
    env_kwargs = {
        "max_distance": args.max_distance,
        "random_target": args.random_target,
        "force_down": True,
        "is_discrete": not args.continuous_actions,
        "renders": thread_num == 0 and args.display,
        "record_data": not args.no_record_data,
        "multi_view": args.multi_view,
        "save_path": args.save_path,
        "shape_reward": args.shape_reward,
        "simple_continual_target": args.simple_continual,
        "circular_continual_move": args.circular_continual,
        "square_continual_move": args.square_continual,
        "short_episodes": args.short_episodes
    }

    if partition:
        env_kwargs["name"] = args.name + "_part-" + str(thread_num)
    else:
        env_kwargs["name"] = args.name

    load_path, train_args, algo_name, algo_class = None, None, None, None
    model = None
    srl_model = None
    srl_state_dim = 0
    generated_obs = None
    env_norm = None

    if args.run_policy in ["walker", "custom"]:
        if args.latest:
            args.log_dir = latestPath(args.log_custom_policy)
        else:
            args.log_dir = args.log_custom_policy
        args.render = args.display
        args.plotting, args.action_proba = False, False

        train_args, load_path, algo_name, algo_class, _, env_kwargs_extra = loadConfigAndSetup(
            args)
        env_kwargs["srl_model"] = env_kwargs_extra["srl_model"]
        env_kwargs["random_target"] = env_kwargs_extra.get(
            "random_target", False)
        env_kwargs["use_srl"] = env_kwargs_extra.get("use_srl", False)

        # TODO REFACTOR
        env_kwargs["simple_continual_target"] = env_kwargs_extra.get(
            "simple_continual_target", False)
        env_kwargs["circular_continual_move"] = env_kwargs_extra.get(
            "circular_continual_move", False)
        env_kwargs["square_continual_move"] = env_kwargs_extra.get(
            "square_continual_move", False)
        env_kwargs["eight_continual_move"] = env_kwargs_extra.get(
            "eight_continual_move", False)

        eps = 0.2
        env_kwargs["state_init_override"] = np.array([MIN_X + eps, MAX_X - eps]) \
            if args.run_policy == 'walker' else None
        if env_kwargs["use_srl"]:
            env_kwargs["srl_model_path"] = env_kwargs_extra.get(
                "srl_model_path", None)
            env_kwargs["state_dim"] = getSRLDim(
                env_kwargs_extra.get("srl_model_path", None))
            srl_model = MultiprocessSRLModel(num_cpu=args.num_cpu,
                                             env_id=args.env,
                                             env_kwargs=env_kwargs)
            env_kwargs["srl_pipe"] = srl_model.pipe

    env_class = registered_env[args.env][0]
    env = env_class(**env_kwargs)

    if env_kwargs.get('srl_model', None) not in ["raw_pixels", None]:
        # TODO: Remove env duplication
        # This is a dirty trick to normalize the obs.
        # So for as we override SRL environment functions (step, reset) for on-policy generation & generative replay
        # using stable-baselines' normalisation wrappers (step & reset) breaks...
        env_norm = [
            makeEnv(args.env,
                    args.seed,
                    i,
                    args.log_dir,
                    allow_early_resets=False,
                    env_kwargs=env_kwargs) for i in range(args.num_cpu)
        ]
        env_norm = DummyVecEnv(env_norm)
        env_norm = VecNormalize(env_norm, norm_obs=True, norm_reward=False)
        env_norm = loadRunningAverage(
            env_norm, load_path_normalise=args.log_custom_policy)
    using_real_omnibot = args.env == "OmnirobotEnv-v0" and USING_OMNIROBOT

    walker_path = None
    action_walker = None
    state_init_for_walker = None
    kwargs_reset, kwargs_step = {}, {}

    if args.run_policy in ['custom', 'ppo2', 'walker']:
        # Additional env when using a trained agent to generate data
        train_env = vecEnv(env_kwargs, env_class)

        if args.run_policy == 'ppo2':
            model = PPO2(CnnPolicy, train_env).learn(args.ppo2_timesteps)
        else:
            _, _, algo_args = createEnv(args, train_args, algo_name,
                                        algo_class, env_kwargs)
            tf.reset_default_graph()
            set_global_seeds(args.seed % 2 ^ 32)
            printYellow("Compiling Policy function....")
            model = algo_class.load(load_path, args=algo_args)
            if args.run_policy == 'walker':
                walker_path = walkerPath()

    if len(args.replay_generative_model) > 0:
        srl_model = loadSRLModel(args.log_generative_model,
                                 th.cuda.is_available())
        srl_state_dim = srl_model.state_dim
        srl_model = srl_model.model.model

    frames = 0
    start_time = time.time()

    # divide evenly, then do an extra one for only some of them in order to get the right count
    for i_episode in range(args.num_episode // args.num_cpu + 1 *
                           (args.num_episode % args.num_cpu > thread_num)):

        # seed + position in this slice + size of slice (with reminder if uneven partitions)
        seed = args.seed + i_episode + args.num_episode // args.num_cpu * thread_num + \
               (thread_num if thread_num <= args.num_episode % args.num_cpu else args.num_episode % args.num_cpu)
        seed = seed % 2 ^ 32
        if not (args.run_policy in ['custom', 'walker']):
            env.seed(seed)
            env.action_space.seed(
                seed)  # this is for the sample() function from gym.space

        if len(args.replay_generative_model) > 0:

            sample = Variable(th.randn(1, srl_state_dim))
            if th.cuda.is_available():
                sample = sample.cuda()

            generated_obs = srl_model.decode(sample)
            generated_obs = generated_obs[0].detach().cpu().numpy()
            generated_obs = deNormalize(generated_obs)

            kwargs_reset['generated_observation'] = generated_obs
        obs = env.reset(**kwargs_reset)
        done = False
        action_proba = None
        t = 0
        episode_toward_target_on = False

        while not done:

            env.render()

            # Policy to run on the fly - to be trained before generation
            if args.run_policy == 'ppo2':
                action, _ = model.predict([obs])

            # Custom pre-trained Policy (SRL or End-to-End)
            elif args.run_policy in ['custom', 'walker']:
                obs = env_norm._normalize_observation(obs)
                action = [model.getAction(obs, done)]
                action_proba = model.getActionProba(obs, done)
                if args.run_policy == 'walker':
                    action_walker = np.array(walker_path[t])
            # Random Policy
            else:
                # Using a target reaching policy (untrained, from camera) when collecting data from real OmniRobot
                if episode_toward_target_on and np.random.rand() < args.toward_target_timesteps_proportion and \
                        using_real_omnibot:
                    action = [env.actionPolicyTowardTarget()]
                else:
                    action = [env.action_space.sample()]

            # Generative replay +/- for on-policy action
            if len(args.replay_generative_model) > 0:

                if args.run_policy == 'custom':
                    obs = obs.reshape(1, srl_state_dim)
                    obs = th.from_numpy(obs.astype(np.float32)).cuda()
                    z = obs
                    generated_obs = srl_model.decode(z)
                else:
                    sample = Variable(th.randn(1, srl_state_dim))

                    if th.cuda.is_available():
                        sample = sample.cuda()

                    generated_obs = srl_model.decode(sample)
                generated_obs = generated_obs[0].detach().cpu().numpy()
                generated_obs = deNormalize(generated_obs)

            action_to_step = action[0]
            kwargs_step = {
                k: v
                for (k, v) in [("generated_observation",
                                generated_obs), ("action_proba", action_proba),
                               ("action_grid_walker", action_walker)]
                if v is not None
            }

            obs, _, done, _ = env.step(action_to_step, **kwargs_step)

            frames += 1
            t += 1
            if done:
                if np.random.rand(
                ) < args.toward_target_timesteps_proportion and using_real_omnibot:
                    episode_toward_target_on = True
                else:
                    episode_toward_target_on = False
                print("Episode finished after {} timesteps".format(t + 1))

        if thread_num == 0:
            print("{:.2f} FPS".format(frames * args.num_cpu /
                                      (time.time() - start_time)))
예제 #14
0
def GatherExperiments(
    folders,
    algo,
    window=40,
    title="",
    min_num_x=-1,
    timesteps=False,
    output_file="",
):
    """
    Compute mean and standard error for several experiments and plot the learning curve
    :param folders: ([str]) Log folders, where the monitor.csv are stored
    :param window: (int) Smoothing window
    :param algo: (str) name of the RL algo
    :param title: (str) plot title
    :param min_num_x: (int) Minimum number of episode/timesteps to keep an experiment (default: -1, no minimum)
    :param timesteps: (bool) Plot timesteps instead of episodes
    :param y_limits: ([float]) y-limits for the plot
    :param output_file: (str) Path to a file where the plot data will be saved
    :param no_display: (bool) Set to true, the plot won't be displayed (useful when only saving plot)
    """
    y_list = []
    x_list = []
    ok = False
    for folder in folders:
        if timesteps:
            x, y = loadData(folder, smooth=1, bin_size=100)
            if x is not None:
                x, y = np.array(x), np.array(y)
        else:
            x, y = loadEpisodesData(folder)

        if x is None or (min_num_x > 0 and y.shape[0] < min_num_x):
            printRed("Skipping {}".format(folder))
            continue

        if y.shape[0] <= window:
            printYellow("Folder {}".format(folder))
            printRed("Not enough episodes for current window size = {}".format(
                window))
            continue
        ok = True
        y = movingAverage(y, window)
        y_list.append(y)
        print(len(x))
        # Truncate x
        x = x[len(x) - len(y):]
        x_list.append(x)

    if not ok:
        printRed("Not enough data to plot anything with current config." +
                 " Consider decreasing --min-x")
        return

    lengths = list(map(len, x_list))
    min_x, max_x = np.min(lengths), np.max(lengths)

    print("Min x: {}".format(min_x))
    print("Max x: {}".format(max_x))

    for i in range(len(x_list)):
        x_list[i] = x_list[i][:min_x]
        y_list[i] = y_list[i][:min_x]

    x = np.array(x_list)[0]
    y = np.array(y_list)
    # if output_file != "":
    #     printGreen("Saving aggregated data to {}.npz".format(output_file))
    #     np.savez(output_file, x=x, y=y)
    return x, y
예제 #15
0
def loadSRLModel(path=None,
                 cuda=False,
                 state_dim=None,
                 env_object=None,
                 img_shape=None):
    """
    Load a trained SRL model, it will try to guess the model type from the path
    :param path: (str) Path to a srl model
    :param cuda: (bool)
    :param state_dim: (int)
    :param env_object: (gym env object)
    :return: (srl model)
    """

    model_type, losses, n_actions, model = None, None, None, None

    if path is not None:
        # Get path to the log folder
        log_folder = '/'.join(path.split('/')[:-1]) + '/'
        with open(log_folder + 'exp_config.json', 'r') as f:
            # IMPORTANT: keep the order for the losses
            # so the json is loaded as an OrderedDict
            exp_config = json.load(f, object_pairs_hook=OrderedDict)

        state_dim = exp_config.get('state-dim', None)
        losses = exp_config.get(
            'losses', None)  # None in the case of baseline models (pca)
        n_actions = exp_config.get(
            'n_actions', None)  # None in the case of baseline models (pca)
        model_type = exp_config.get('model-type', None)
        use_multi_view = exp_config.get('multi-view', False)
        inverse_model_type = exp_config.get('inverse-model-type', 'linear')
        num_dataset_episodes = exp_config.get('num_dataset_episodes', 100)
        assert state_dim is not None, \
            "Please make sure you are loading an up to date model with a conform exp_config file."

        split_dimensions = exp_config.get('split-dimensions')
        if isinstance(split_dimensions, OrderedDict):
            n_dims = sum(split_dimensions.values())
            # Combine losses instead of splitting
            if n_dims == 0:
                split_dimensions = None
    else:
        assert env_object is not None or state_dim > 0, \
            "When learning states, state_dim must be > 0. Otherwise, set SRL_MODEL_PATH \
            to a srl_model.pth file with learned states."

    if path is not None:
        if 'baselines' in path:
            if 'pca' in path:
                model_type = 'pca'
                model = SRLPCA(state_dim)

    assert model_type is not None or model is not None, \
        "Model type not supported. In order to use loadSRLModel, a path to an SRL model must be given."
    assert not (losses is None and not model_type == 'pca'), \
        "Please make sure you are loading an up to date model with a conform exp_config file."
    assert not (n_actions is None and not (model_type == 'pca')), \
        "Please make sure you are loading an up to date model with a conform exp_config file."
    if model is None:
        if use_multi_view:
            new_img_shape = (6, ) + img_shape[1:]
        else:
            new_img_shape = img_shape
        model = SRLNeuralNetwork(state_dim,
                                 cuda,
                                 img_shape=new_img_shape,
                                 model_type=model_type,
                                 n_actions=n_actions,
                                 losses=losses,
                                 split_dimensions=split_dimensions,
                                 spcls_num_classes=num_dataset_episodes,
                                 inverse_model_type=inverse_model_type)

    model_name = model_type
    if 'baselines' not in path:
        model_name += " with " + ", ".join(losses)
    printGreen("\nSRL: Using {} \n".format(model_name))

    if path is not None:
        printYellow("Loading trained model...{}".format(path))
        model.load(path)
    return model
예제 #16
0
def main():
    # Global variables for callback
    parser = argparse.ArgumentParser(
        description=
        "Evaluation script for distillation from two teacher policies")
    parser.add_argument('--seed',
                        type=int,
                        default=0,
                        help='random seed (default: 0)')
    parser.add_argument('--env',
                        type=str,
                        help='environment ID',
                        default='OmnirobotEnv-v0',
                        choices=list(registered_env.keys()))
    parser.add_argument(
        '--episode_window',
        type=int,
        default=40,
        help='Episode window for moving average plot (default: 40)')
    parser.add_argument(
        '--log-dir-teacher-one',
        default='/tmp/gym/',
        type=str,
        help=
        'directory to load an optmimal agent for task 1 (default: /tmp/gym)')
    parser.add_argument(
        '--log-dir-teacher-two',
        default='/tmp/gym/',
        type=str,
        help=
        'directory to load an optmimal agent for task 2 (default: /tmp/gym)')
    parser.add_argument(
        '--log-dir-student',
        default='/tmp/gym/',
        type=str,
        help=
        'directory to save the student agent logs and model (default: /tmp/gym)'
    )
    parser.add_argument(
        '--srl-config-file-one',
        type=str,
        default="config/srl_models_one.yaml",
        help='Set the location of the SRL model path configuration.')
    parser.add_argument(
        '--srl-config-file-two',
        type=str,
        default="config/srl_models_two.yaml",
        help='Set the location of the SRL model path configuration.')
    parser.add_argument(
        '--epochs-distillation',
        type=int,
        default=30,
        metavar='N',
        help='number of epochs to train for distillation(default: 30)')
    parser.add_argument(
        '--distillation-training-set-size',
        type=int,
        default=-1,
        help='Limit size (number of samples) of the training set (default: -1)'
    )
    parser.add_argument(
        '--eval-tasks',
        type=str,
        nargs='+',
        default=['cc', 'sqc', 'sc'],
        help='A cross evaluation from the latest stored model to all tasks')
    parser.add_argument(
        '--continual-learning-labels',
        type=str,
        nargs=2,
        metavar=('label_1', 'label_2'),
        default=argparse.SUPPRESS,
        help='Labels for the continual learning RL distillation task.')
    parser.add_argument('--student-srl-model',
                        type=str,
                        default='raw_pixels',
                        choices=list(registered_srl.keys()),
                        help='SRL model to use for the student RL policy')
    parser.add_argument(
        '--epochs-teacher-datasets',
        type=int,
        default=30,
        metavar='N',
        help=
        'number of epochs for generating both RL teacher datasets (default: 30)'
    )
    parser.add_argument(
        '--num-iteration',
        type=int,
        default=1,
        help='number of time each algorithm should be run the eval (N seeds).')
    parser.add_argument(
        '--eval-episode-window',
        type=int,
        default=400,
        metavar='N',
        help=
        'Episode window for saving each policy checkpoint for future distillation(default: 100)'
    )

    args, unknown = parser.parse_known_args()

    if 'continual_learning_labels' in args:
        assert args.continual_learning_labels[0] in CONTINUAL_LEARNING_LABELS and args.continual_learning_labels[1] \
               in CONTINUAL_LEARNING_LABELS, "Please specify a valid Continual learning label to each dataset to be " \
                                             "used for RL distillation !"
    print(args.continual_learning_labels)
    assert os.path.exists(args.srl_config_file_one), \
        "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file_one)

    assert os.path.exists(args.srl_config_file_two), \
        "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file_two)
    if not (args.log_dir_teacher_one == "None"):
        assert os.path.exists(args.log_dir_teacher_one), \
            "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.log_dir_teacher_one)
    assert os.path.exists(args.log_dir_teacher_two), \
        "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file_two)

    teacher_pro = args.log_dir_teacher_one
    teacher_learn = args.log_dir_teacher_two

    # The output path generate from the
    teacher_pro_data = args.continual_learning_labels[0] + '/'
    teacher_learn_data = args.continual_learning_labels[1] + '/'
    merge_path = "data/on_policy_merged"

    print(teacher_pro_data, teacher_learn_data)
    episodes, policy_path = allPolicy(teacher_learn)

    rewards_at_episode = {}
    episodes_to_test = [
        e for e in episodes if (int(e) < 2000 and int(e) % 200 == 0) or (
            int(e) > 2000 and int(e) % 1000 == 0)
    ]

    # generate data from Professional teacher
    printYellow("\nGenerating on policy for optimal teacher: " +
                args.continual_learning_labels[0])

    if not (args.log_dir_teacher_one == "None"):
        OnPolicyDatasetGenerator(teacher_pro,
                                 args.continual_learning_labels[0] + '_copy/',
                                 task_id=args.continual_learning_labels[0],
                                 num_eps=args.epochs_teacher_datasets,
                                 episode=-1,
                                 env_name=args.env)
    print("Eval on eps list: ", episodes_to_test)
    for eps in episodes_to_test:
        student_path = args.log_dir_student
        printBlue("\n\nEvaluation at episode " + str(eps))

        if not (args.log_dir_teacher_one == "None"):
            # Use a copy of the optimal teacher
            ok = subprocess.call([
                'cp', '-r',
                'data/' + args.continual_learning_labels[0] + '_copy/',
                'data/' + teacher_pro_data, '-f'
            ])
            assert ok == 0
            time.sleep(2)

        # Generate data from learning teacher
        printYellow("\nGenerating on-policy data from the optimal teacher: " +
                    args.continual_learning_labels[1])
        OnPolicyDatasetGenerator(teacher_learn,
                                 teacher_learn_data,
                                 task_id=args.continual_learning_labels[1],
                                 episode=eps,
                                 num_eps=args.epochs_teacher_datasets,
                                 env_name=args.env)

        if args.log_dir_teacher_one == "None":
            merge_path = 'data/' + teacher_learn_data
            ok = subprocess.call(
                ['cp', '-r', merge_path, 'srl_zoo/data/', '-f'])
        else:
            # merge the data
            mergeData('data/' + teacher_pro_data,
                      'data/' + teacher_learn_data,
                      merge_path,
                      force=True)

            ok = subprocess.call(
                ['cp', '-r', 'data/on_policy_merged/', 'srl_zoo/data/', '-f'])
        assert ok == 0
        time.sleep(2)

        # Train a policy with distillation on the merged teacher's datasets
        trainStudent('srl_zoo/' + merge_path,
                     args.continual_learning_labels[1],
                     yaml_file=args.srl_config_file_one,
                     log_dir=args.log_dir_student,
                     srl_model=args.student_srl_model,
                     env_name=args.env,
                     training_size=args.distillation_training_set_size,
                     epochs=args.epochs_distillation)
        student_path += args.env + '/' + args.student_srl_model + "/distillation/"
        latest_student_path = max([
            student_path + "/" + d for d in os.listdir(student_path)
            if os.path.isdir(student_path + "/" + d)
        ],
                                  key=os.path.getmtime) + '/'
        rewards = {}
        printRed("\nSaving the student at path: " + latest_student_path)
        for task_label in ["-sc", "-cc"]:
            rewards[task_label] = []

            for seed_i in range(args.num_iteration):
                printYellow("\nEvaluating student on task: " + task_label +
                            " for seed: " + str(seed_i))
                command_line_enjoy_student = [
                    'python', '-m', 'replay.enjoy_baselines',
                    '--num-timesteps', '251', '--log-dir', latest_student_path,
                    task_label, "--seed",
                    str(seed_i)
                ]
                ok = subprocess.check_output(command_line_enjoy_student)
                ok = ok.decode('utf-8')
                str_before = "Mean reward: "
                str_after = "\npybullet"
                idx_before = ok.find(str_before) + len(str_before)
                idx_after = ok.find(str_after)
                seed_reward = float(ok[idx_before:idx_after])
                rewards[task_label].append(seed_reward)
        print("rewards at eps ", eps, ": ", rewards)
        rewards_at_episode[eps] = rewards
    print("All rewards: ", rewards_at_episode)
    json_dict = json.dumps(rewards_at_episode)
    json_dict_name = \
        args.log_dir_student + "/reward_at_episode_" + datetime.datetime.now().strftime("%y-%m-%d_%Hh%M_%S") + '.json'
    f = open(json_dict_name, "w")
    f.write(json_dict)
    f.close()
    printRed("\nSaving the evalation at path: " + json_dict_name)
예제 #17
0
def main():
    parser = argparse.ArgumentParser(description="OpenAI RL Baselines Benchmark",
                                     epilog='After the arguments are parsed, the rest are assumed to be arguments for' +
                                            ' rl_baselines.train')
    parser.add_argument('--algo', type=str, default='ppo2', help='OpenAI baseline to use',
                        choices=list(registered_rl.keys()))
    parser.add_argument('--env', type=str, nargs='+', default=["OmnirobotEnv-v0"], help='environment ID(s)',
                        choices=["OmnirobotEnv-v0"])#list(registered_env.keys()))
    parser.add_argument('--srl-model', type=str, nargs='+', default=["ground_truth"],
                        help='SRL model(s) to use',
                        choices=list(registered_srl.keys()))
    parser.add_argument('--num-timesteps', type=int, default=1e6, help='number of timesteps the baseline should run')
    parser.add_argument('-v', '--verbose', action='store_true', default=False, help='Display baseline STDOUT')
    parser.add_argument('--num-iteration', type=int, default=15,
                        help='number of time each algorithm should be run for each unique combination of environment ' +
                             ' and srl-model.')
    parser.add_argument('--seed', type=int, default=0,
                        help='initial seed for each unique combination of environment and srl-model.')
    parser.add_argument('--srl-config-file', nargs='+', type=str, default=["config/srl_models.yaml"],
                        help='Set the location of the SRL model path configuration.')
    
    parser.add_argument('--tasks', type=str, nargs='+', default=["cc"],
                        help='The tasks for the robot',
                        choices=["cc","ec","sqc","sc"])
#    parser.add_argument('--srl-modell', type=str, default="",help='')
    # returns the parsed arguments, and the rest are assumed to be arguments for rl_baselines.train
    args, train_args = parser.parse_known_args()



    

    # Sanity check
    assert args.num_timesteps >= 1, "Error: --num-timesteps cannot be less than 1"
    assert args.num_iteration >= 1, "Error: --num-iteration cannot be less than 1"

    # Removing duplicates and sort
    srl_models = list(set(args.srl_model))
    envs = list(set(args.env))
    tasks=args.tasks
    srl_models.sort()
    envs.sort()
    tasks=['-'+t  for t in tasks]
    config_files=args.srl_config_file

    # LOAD SRL models list


    if len(config_files)==1:
        printYellow("Your are using the same config file: {} for all training tasks".format(config_files[0]))

        for i in range(len(tasks)-1):
            config_files.append(config_files[0])
    else:
        assert len(config_files)==len(tasks), \
            "Error:  {} config files given for {} tasks".format(len(config_files),len(tasks))

    for file in config_files:
        assert os.path.exists(file), \
            "Error: cannot load \"--srl-config-file {}\", file not found!".format(file)

    for file in config_files:
        with open(file, 'rb') as f:
            all_models = yaml.load(f)
        # Checking definition and presence of all requested srl_models
        valid = True
        for env in envs:
            # validated the env definition
            if env not in all_models:
                printRed("Error: 'srl_models.yaml' missing definition for environment {}".format(env))
                valid = False
                continue  # skip to the next env, this one is not valid

            # checking log_folder for current env
            missing_log = "log_folder" not in all_models[env]
            if missing_log:
                printRed("Error: '{}' missing definition for log_folder in environment {}".format(file, env))
                valid = False

            # validate each model for the current env definition
            for model in srl_models:
                if registered_srl[model][0] == SRLType.ENVIRONMENT:
                    continue  # not an srl model, skip to the next model
                elif model not in all_models[env]:
                    printRed("Error: '{}' missing srl_model {} for environment {}".format(file, model, env))
                    valid = False
                elif (not missing_log) and (not os.path.exists(all_models[env]["log_folder"] + all_models[env][model])):
                    # checking presence of srl_model path, if and only if log_folder exists
                    printRed("Error: srl_model {} for environment {} was defined in ".format(model, env) +
                             "'{}', however the file {} it was tagetting does not exist.".format(
                                 file, all_models[env]["log_folder"] + all_models[env][model]))
                    valid = False

        assert valid, "Errors occurred due to malformed {}, cannot continue.".format(file)



    # check that all the SRL_models can be run on all the environments
    valid = True
    for env in envs:
        for model in srl_models:
            if registered_srl[model][1] is not None:
                found = False
                for compatible_class in registered_srl[model][1]:
                    if issubclass(compatible_class, registered_env[env][0]):
                        found = True
                        break
                if not found:
                    valid = False
                    printRed("Error: srl_model {}, is not compatible with the {} environment.".format(model, env))
    assert valid, "Errors occured due to an incompatible combination of srl_model and environment, cannot continue."

    # the seeds used in training the baseline.
    seeds = list(np.arange(args.num_iteration) + args.seed)

    if args.verbose:
        # None here means stdout of terminal for subprocess.call
        stdout = None
    else:
        stdout = open(os.devnull, 'w')

    printGreen("\nRunning {} benchmarks {} times...".format(args.algo, args.num_iteration))
    print("\nSRL-Models:\t{}".format(srl_models))
    print("environments:\t{}".format(envs))
    print("verbose:\t{}".format(args.verbose))
    print("timesteps:\t{}".format(args.num_timesteps))

    num_tasks=len(tasks)
    print(num_tasks)

    printGreen("The tasks that will be exacuted: {}".format(args.tasks))
    printGreen("with following config files: {}".format(config_files))


    for model in srl_models:

        for env in envs:
            for iter_task in range(num_tasks):

                for i in range(args.num_iteration):
                    printGreen(
                        "\nIteration_num={} (seed: {}), Environment='{}', SRL-Model='{}' , Task='{}',Config_file='{}'".format(i, seeds[i], env, model, tasks[iter_task],config_files[iter_task]))
    
                    # redefine the parsed args for rl_baselines.train
                    loop_args = ['--srl-model', model, '--seed', str(seeds[i]),
                                 '--algo', args.algo, '--env', env,
                                 '--num-timesteps', str(int(args.num_timesteps)), 
                                 '--srl-config-file', config_files[iter_task], tasks[iter_task]]
                    ok = subprocess.call(['python', '-m', 'rl_baselines.train'] + train_args + loop_args, stdout=stdout)
    
                    if ok != 0:
                        # throw the error down to the terminal
                        raise ChildProcessError("An error occured, error code: {}".format(ok))
예제 #18
0
파일: train.py 프로젝트: s206283/gcrl
def main():
    # Global variables for callback
    global ENV_NAME, ALGO, ALGO_NAME, LOG_INTERVAL, VISDOM_PORT, viz
    global SAVE_INTERVAL, EPISODE_WINDOW, MIN_EPISODES_BEFORE_SAVE
    parser = argparse.ArgumentParser(description="Train script for RL algorithms")
    parser.add_argument('--algo', default='ppo2', choices=list(registered_rl.keys()), help='RL algo to use',
                        type=str)
    parser.add_argument('--env', type=str, help='environment ID', default='KukaButtonGymEnv-v0',
                        choices=list(registered_env.keys()))
    parser.add_argument('--seed', type=int, default=0, help='random seed (default: 0)')
    parser.add_argument('--episode_window', type=int, default=40,
                        help='Episode window for moving average plot (default: 40)')
    parser.add_argument('--log-dir', default='/tmp/gym/', type=str,
                        help='directory to save agent logs and model (default: /tmp/gym)')
    parser.add_argument('--num-timesteps', type=int, default=int(1e6))
    parser.add_argument('--srl-model', type=str, default='raw_pixels', choices=list(registered_srl.keys()),
                        help='SRL model to use')
    parser.add_argument('--num-stack', type=int, default=1, help='number of frames to stack (default: 1)')
    parser.add_argument('--action-repeat', type=int, default=1,
                        help='number of times an action will be repeated (default: 1)')
    parser.add_argument('--port', type=int, default=8097, help='visdom server port (default: 8097)')
    parser.add_argument('--no-vis', action='store_true', default=False, help='disables visdom visualization')
    parser.add_argument('--shape-reward', action='store_true', default=False,
                        help='Shape the reward (reward = - distance) instead of a sparse reward')
    parser.add_argument('-c', '--continuous-actions', action='store_true', default=False)
    parser.add_argument('-joints', '--action-joints', action='store_true', default=False,
                        help='set actions to the joints of the arm directly, instead of inverse kinematics')
    parser.add_argument('-r', '--random-target', action='store_true', default=False,
                        help='Set the button to a random position')
    parser.add_argument('--srl-config-file', type=str, default="config/srl_models.yaml",
                        help='Set the location of the SRL model path configuration.')
    parser.add_argument('--hyperparam', type=str, nargs='+', default=[])
    parser.add_argument('--min-episodes-save', type=int, default=100,
                        help="Min number of episodes before saving best model")
    parser.add_argument('--latest', action='store_true', default=False,
                        help='load the latest learned model (location:srl_zoo/logs/DatasetName/)')
    parser.add_argument('--load-rl-model-path', type=str, default=None,
                        help="load the trained RL model, should be with the same algorithm type")
    
    # Ignore unknown args for now
    args, unknown = parser.parse_known_args()
    env_kwargs = {}

    # LOAD SRL models list
    assert os.path.exists(args.srl_config_file), \
        "Error: cannot load \"--srl-config-file {}\", file not found!".format(args.srl_config_file)
    with open(args.srl_config_file, 'rb') as f:
        all_models = yaml.load(f)

    # Sanity check
    assert args.episode_window >= 1, "Error: --episode_window cannot be less than 1"
    assert args.num_timesteps >= 1, "Error: --num-timesteps cannot be less than 1"
    assert args.num_stack >= 1, "Error: --num-stack cannot be less than 1"
    assert args.action_repeat >= 1, "Error: --action-repeat cannot be less than 1"
    assert 0 <= args.port < 65535, "Error: invalid visdom port number {}, ".format(args.port) + \
                                   "port number must be an unsigned 16bit number [0,65535]."
    assert registered_srl[args.srl_model][0] == SRLType.ENVIRONMENT or args.env in all_models, \
        "Error: the environment {} has no srl_model defined in 'srl_models.yaml'. Cannot continue.".format(args.env)
    # check that all the SRL_model can be run on the environment
    if registered_srl[args.srl_model][1] is not None:
        found = False
        for compatible_class in registered_srl[args.srl_model][1]:
            if issubclass(compatible_class, registered_env[args.env][0]):
                found = True
                break
        assert found, "Error: srl_model {}, is not compatible with the {} environment.".format(args.srl_model, args.env)

    ENV_NAME = args.env
    ALGO_NAME = args.algo
    VISDOM_PORT = args.port
    EPISODE_WINDOW = args.episode_window
    MIN_EPISODES_BEFORE_SAVE = args.min_episodes_save

    if args.no_vis:
        viz = False

    algo_class, algo_type, action_type = registered_rl[args.algo]
    algo = algo_class()
    ALGO = algo
    

    # if callback frequency needs to be changed
    LOG_INTERVAL = algo.LOG_INTERVAL
    SAVE_INTERVAL = algo.SAVE_INTERVAL

    if not args.continuous_actions and ActionType.DISCRETE not in action_type:
        raise ValueError(args.algo + " does not support discrete actions, please use the '--continuous-actions' " +
                         "(or '-c') flag.")
    if args.continuous_actions and ActionType.CONTINUOUS not in action_type:
        raise ValueError(args.algo + " does not support continuous actions, please remove the '--continuous-actions' " +
                         "(or '-c') flag.")

    env_kwargs["is_discrete"] = not args.continuous_actions

    printGreen("\nAgent = {} \n".format(args.algo))

    env_kwargs["action_repeat"] = args.action_repeat
    # Random init position for button
    env_kwargs["random_target"] = args.random_target
    # Allow up action
    # env_kwargs["force_down"] = False

    # allow multi-view
    env_kwargs['multi_view'] = args.srl_model == "multi_view_srl"
    parser = algo.customArguments(parser)
    args = parser.parse_args()

    args, env_kwargs = configureEnvAndLogFolder(args, env_kwargs, all_models)
    args_dict = filterJSONSerializableObjects(vars(args))
    # Save args
    with open(LOG_DIR + "args.json", "w") as f:
        json.dump(args_dict, f)

    env_class = registered_env[args.env][0]
    # env default kwargs
    default_env_kwargs = {k: v.default
                          for k, v in inspect.signature(env_class.__init__).parameters.items()
                          if v is not None}

    globals_env_param = sys.modules[env_class.__module__].getGlobals()

    super_class = registered_env[args.env][1]
    # reccursive search through all the super classes of the asked environment, in order to get all the arguments.
    rec_super_class_lookup = {dict_class: dict_super_class for _, (dict_class, dict_super_class, _, _) in
                              registered_env.items()}
    while super_class != SRLGymEnv:
        assert super_class in rec_super_class_lookup, "Error: could not find super class of {}".format(super_class) + \
                                                      ", are you sure \"registered_env\" is correctly defined?"
        super_env_kwargs = {k: v.default
                            for k, v in inspect.signature(super_class.__init__).parameters.items()
                            if v is not None}
        default_env_kwargs = {**super_env_kwargs, **default_env_kwargs}

        globals_env_param = {**sys.modules[super_class.__module__].getGlobals(), **globals_env_param}

        super_class = rec_super_class_lookup[super_class]

    # Print Variables
    printYellow("Arguments:")
    pprint(args_dict)
    printYellow("Env Globals:")
    pprint(filterJSONSerializableObjects({**globals_env_param, **default_env_kwargs, **env_kwargs}))
    # Save env params
    saveEnvParams(globals_env_param, {**default_env_kwargs, **env_kwargs})
    # Seed tensorflow, python and numpy random generator
    set_global_seeds(args.seed)
    # Augment the number of timesteps (when using mutliprocessing this number is not reached)
    args.num_timesteps = int(1.1 * args.num_timesteps)
    # Get the hyperparameter, if given (Hyperband)
    hyperparams = {param.split(":")[0]: param.split(":")[1] for param in args.hyperparam}
    hyperparams = algo.parserHyperParam(hyperparams)
    
    if args.load_rl_model_path is not None:
        #use a small learning rate
        print("use a small learning rate: {:f}".format(1.0e-4))
        hyperparams["learning_rate"] = lambda f: f * 1.0e-4
        
    # Train the agent

    if args.load_rl_model_path is not None:
        algo.setLoadPath(args.load_rl_model_path)
    algo.train(args, callback, env_kwargs=env_kwargs, train_kwargs=hyperparams)
예제 #19
0
def loadConfigAndSetup(load_args):
    """
    Get the training config and setup the parameters
    :param load_args: (Arguments)
    :return: (dict, str, str, str, dict)
    """
    algo_name = ""
    for algo in list(registered_rl.keys()):
        if algo in load_args.log_dir:
            algo_name = algo
            break
    algo_class, algo_type, _ = registered_rl[algo_name]
    if algo_type == AlgoType.OTHER:
        raise ValueError(algo_name + " is not supported for replay")
    printGreen("\n" + algo_name + "\n")

    try:  # If args contains episode information, this is for student_evaluation (distillation)
        if not load_args.episode == -1:
            load_path = "{}/{}_{}_model.pkl".format(load_args.log_dir, algo_name, load_args.episode,)
        else:
            load_path = "{}/{}_model.pkl".format(load_args.log_dir, algo_name)
    except:
        printYellow(
            "No episode of checkpoint specified, go for the default policy model: {}_model.pkl".format(algo_name))
        if load_args.log_dir[-3:] != 'pkl':
            load_path = "{}/{}_model.pkl".format(load_args.log_dir, algo_name)
        else:
            load_path = load_args.log_dir
            load_args.log_dir = os.path.dirname(load_path)+'/'

    env_globals = json.load(open(load_args.log_dir + "env_globals.json", 'r'))
    train_args = json.load(open(load_args.log_dir + "args.json", 'r'))

    env_kwargs = {
        "renders": load_args.render,
        "shape_reward": load_args.shape_reward,  # Reward sparse or shaped
        "action_joints": train_args["action_joints"],
        "is_discrete": not train_args["continuous_actions"],
        "random_target": train_args.get('random_target', False),
        "srl_model": train_args["srl_model"]
    }

    # load it, if it was defined
    if "action_repeat" in env_globals:
        env_kwargs["action_repeat"] = env_globals['action_repeat']

    # Remove up action
    if train_args["env"] == "Kuka2ButtonGymEnv-v0":
        env_kwargs["force_down"] = env_globals.get('force_down', True)
    else:
        env_kwargs["force_down"] = env_globals.get('force_down', False)

    if train_args["env"] == "OmnirobotEnv-v0":
        env_kwargs["simple_continual_target"] = env_globals.get("simple_continual_target", False)
        env_kwargs["circular_continual_move"] = env_globals.get("circular_continual_move", False)
        env_kwargs["square_continual_move"] = env_globals.get("square_continual_move", False)
        env_kwargs["eight_continual_move"] = env_globals.get("eight_continual_move", False)

        # If overriding the environment for specific Continual Learning tasks
        if sum([load_args.simple_continual, load_args.circular_continual, load_args.square_continual]) >= 1:
            env_kwargs["simple_continual_target"] = load_args.simple_continual
            env_kwargs["circular_continual_move"] = load_args.circular_continual
            env_kwargs["square_continual_move"] = load_args.square_continual
            env_kwargs["random_target"] = not (load_args.circular_continual or load_args.square_continual)

    srl_model_path = None
    if train_args["srl_model"] != "raw_pixels":
        train_args["policy"] = "mlp"
        path = env_globals.get('srl_model_path')

        if path is not None:
            env_kwargs["use_srl"] = True
            # Check that the srl saved model exists on the disk
            assert os.path.isfile(env_globals['srl_model_path']), \
                "{} does not exist".format(env_globals['srl_model_path'])
            srl_model_path = env_globals['srl_model_path']
            env_kwargs["srl_model_path"] = srl_model_path

    return train_args, load_path, algo_name, algo_class, srl_model_path, env_kwargs
예제 #20
0
def main():
    load_args = parseArguments()
    train_args, load_path, algo_name, algo_class, srl_model_path, env_kwargs = loadConfigAndSetup(load_args)
    log_dir, envs, algo_args = createEnv(load_args, train_args, algo_name, algo_class, env_kwargs)

    assert (not load_args.plotting and not load_args.action_proba)\
        or load_args.num_cpu == 1, "Error: cannot run plotting with more than 1 CPU"

    tf.reset_default_graph()
    set_global_seeds(load_args.seed)
    # createTensorflowSession()


    printYellow("Compiling Policy function....")
    printYellow(load_path)
    method = algo_class.load(load_path, args=algo_args)

    dones = [False for _ in range(load_args.num_cpu)]
    # HACK: check for custom vec env by checking if the last wrapper is WrapFrameStack
    # this is used for detecting algorithms that have a similar wrapping to deepq
    # is considered a hack because we are unable to detect if this wrapper was added earlier to the environment object
    using_custom_vec_env = isinstance(envs, WrapFrameStack)

    obs = envs.reset()
    if using_custom_vec_env:
        obs = obs.reshape((1,) + obs.shape)

    # plotting init
    if load_args.plotting:
        plt.pause(0.1)
        fig = plt.figure()
        old_obs = []
        if registered_env[train_args["env"]][2] == PlottingType.PLOT_3D:
            ax = fig.add_subplot(111, projection='3d')
            line, = ax.plot([], [], [], c=[1, 0, 0, 1], label="episode 0")
            point = ax.scatter([0], [0], [0], c=[1, 0, 0, 1])
            min_zone = [+np.inf, +np.inf, +np.inf]
            max_zone = [-np.inf, -np.inf, -np.inf]
            amplitude = [0, 0, 0]
            min_state_dim = 3
        else:
            ax = fig.add_subplot(111)
            line, = ax.plot([], [], c=[1, 0, 0, 1], label="episode 0")
            point = ax.scatter([0], [0], c=[1, 0, 0, 1])
            min_zone = [+np.inf, +np.inf]
            max_zone = [-np.inf, -np.inf]
            amplitude = [0, 0]
            min_state_dim = 2
        fig.legend()

        if train_args["srl_model"] in ["ground_truth", "supervised"]:
            delta_obs = [envs.get_original_obs()[0]]
        else:
            # we need to rebuild the PCA representation, in order to visualize correctly in 3D
            # load the saved representations
            path = srl_model_path.split("/")[:-1] + "/image_to_state.json"
            X = np.array(list(json.load(open(path, 'r')).values()))

            X = fixStateDim(X, min_state_dim=min_state_dim)

            # estimate the PCA
            if registered_env[train_args["env"]][2] == PlottingType.PLOT_3D:
                pca = PCA(n_components=3)
            else:
                pca = PCA(n_components=2)
            pca.fit(X)
            delta_obs = [pca.transform(fixStateDim([obs[0]], min_state_dim=min_state_dim))[0]]
        plt.pause(0.00001)

    # check if the algorithm has a defined getActionProba function before allowing action_proba plotting
    if load_args.action_proba:
        if not hasattr(method, "getActionProba"):
            printYellow("Warning: requested flag --action-proba, "
                        "but the algorihtm {} does not implement 'getActionProba'".format(algo_name))
        else:
            fig_prob = plt.figure()
            ax_prob = fig_prob.add_subplot(111)
            old_obs = []
            if train_args["continuous_actions"]:
                ax_prob.set_ylim(np.min(envs.action_space.low), np.max(envs.action_space.high))
                bar = ax_prob.bar(np.arange(np.prod(envs.action_space.shape)),
                                  np.array([0] * np.prod(envs.action_space.shape)),
                                  color=plt.get_cmap('viridis')(int(1 / np.prod(envs.action_space.shape) * 255)))
            else:
                ax_prob.set_ylim(0, 1)
                bar = ax_prob.bar(np.arange(envs.action_space.n), np.array([0] * envs.action_space.n),
                                  color=plt.get_cmap('viridis')(int(1 / envs.action_space.n * 255)))
            plt.pause(1)
            background_prob = fig_prob.canvas.copy_from_bbox(ax_prob.bbox)

    n_done = 0
    last_n_done = 0
    episode = 0
    for i in range(load_args.num_timesteps):
        actions = method.getAction(obs, dones)
        obs, rewards, dones, _ = envs.step(actions)
        if using_custom_vec_env:
            obs = obs.reshape((1,) + obs.shape)

        # plotting
        if load_args.plotting:
            if train_args["srl_model"] in ["ground_truth", "supervised"]:
                ajusted_obs = envs.get_original_obs()[0]
            else:
                ajusted_obs = pca.transform(fixStateDim([obs[0]], min_state_dim=min_state_dim))[0]

            # create a new line, if the episode is finished
            if np.sum(dones) > 0:
                old_obs.append(np.array(delta_obs))
                line.set_c(sns.color_palette()[episode % len(sns.color_palette())])
                episode += 1
                if registered_env[train_args["env"]][2] == PlottingType.PLOT_3D:
                    line, = ax.plot([], [], [], c=[1, 0, 0, 1], label="episode " + str(episode))
                else:
                    line, = ax.plot([], [], c=[1, 0, 0, 1], label="episode " + str(episode))
                fig.legend()
                delta_obs = [ajusted_obs]
            else:
                delta_obs.append(ajusted_obs)

            coor_plt = fixStateDim(np.array(delta_obs), min_state_dim=min_state_dim)[1:]
            unstack_val = coor_plt.shape[1] // train_args.get("num_stack", 1)
            coor_plt = coor_plt[:, -unstack_val:]

            # updating the 3d vertices for the line and the dot drawing, to avoid redrawing the entire image
            if registered_env[train_args["env"]][2] == PlottingType.PLOT_3D:
                line._verts3d = (coor_plt[:, 0], coor_plt[:, 1], coor_plt[:, 2])
                point._offsets3d = (coor_plt[-1:, 0], coor_plt[-1:, 1], coor_plt[-1:, 2])
                if coor_plt.shape[0] > 0:
                    min_zone = np.minimum(np.amin(coor_plt, axis=0), min_zone)
                    max_zone = np.maximum(np.amax(coor_plt, axis=0), max_zone)
                    amplitude = max_zone - min_zone + 1e-10
                ax.set_xlim(min_zone[0] - abs(amplitude[0] * 0.2), max_zone[0] + abs(amplitude[0] * 0.2))
                ax.set_ylim(min_zone[1] - abs(amplitude[1] * 0.2), max_zone[1] + abs(amplitude[1] * 0.2))
                ax.set_zlim(min_zone[2] - abs(amplitude[2] * 0.2), max_zone[2] + abs(amplitude[2] * 0.2))
            else:
                line.set_xdata(coor_plt[:, 0])
                line.set_ydata(coor_plt[:, 1])
                point._offsets = coor_plt[-1:, :]
                if coor_plt.shape[0] > 0:
                    min_zone = np.minimum(np.amin(coor_plt, axis=0), min_zone)
                    max_zone = np.maximum(np.amax(coor_plt, axis=0), max_zone)
                    amplitude = max_zone - min_zone + 1e-10
                ax.set_xlim(min_zone[0] - abs(amplitude[0] * 0.2), max_zone[0] + abs(amplitude[0] * 0.2))
                ax.set_ylim(min_zone[1] - abs(amplitude[1] * 0.2), max_zone[1] + abs(amplitude[1] * 0.2))

            # Draw every 5 frames to avoid UI freezing
            if i % 5 == 0:
                fig.canvas.draw()
                plt.pause(0.000001)

        if load_args.action_proba and hasattr(method, "getActionProba"):
            # When continuous actions are needed, we cannot plot the action probability of every action
            # in the action space, so we show the action directly instead
            if train_args["continuous_actions"]:
                pi = method.getAction(obs, dones)
            else:
                pi = method.getActionProba(obs, dones)

            fig_prob.canvas.restore_region(background_prob)
            for act, rect in enumerate(bar):
                if train_args["continuous_actions"]:
                    rect.set_height(pi[0][act])
                    color_val = np.abs(pi[0][act]) / max(np.max(envs.action_space.high),
                                                         np.max(np.abs(envs.action_space.low)))
                else:
                    rect.set_height(softmax(pi[0])[act])
                    color_val = softmax(pi[0])[act]
                rect.set_color(plt.get_cmap('viridis')(int(color_val * 255)))
                ax_prob.draw_artist(rect)
            fig_prob.canvas.blit(ax_prob.bbox)

        if using_custom_vec_env:
            if dones:
                obs = envs.reset()
                obs = obs.reshape((1,) + obs.shape)

        n_done += np.sum(dones)
        if (n_done - last_n_done) > 1:

            last_n_done = n_done
            _, mean_reward = computeMeanReward(log_dir, n_done)
            print("{} episodes - Mean reward: {:.2f}".format(n_done, mean_reward))
            print("print: ", n_done, log_dir)
    _, mean_reward = computeMeanReward(log_dir, n_done)
    print("{} episodes - Mean reward: {:.2f}".format(n_done, mean_reward))
예제 #21
0
def plotGatheredExperiments(folders,
                            algo,
                            y_limits,
                            window=40,
                            title="",
                            min_num_x=-1,
                            timesteps=False,
                            output_file="",
                            no_display=False):
    """
    Compute mean and standard error for several experiments and plot the learning curve
    :param folders: ([str]) Log folders, where the monitor.csv are stored
    :param window: (int) Smoothing window
    :param algo: (str) name of the RL algo
    :param title: (str) plot title
    :param min_num_x: (int) Minimum number of episode/timesteps to keep an experiment (default: -1, no minimum)
    :param timesteps: (bool) Plot timesteps instead of episodes
    :param y_limits: ([float]) y-limits for the plot
    :param output_file: (str) Path to a file where the plot data will be saved
    :param no_display: (bool) Set to true, the plot won't be displayed (useful when only saving plot)
    """
    y_list = []
    x_list = []
    ok = False
    for folder in folders:
        if timesteps:
            x, y = loadData(folder, smooth=1, bin_size=100)
            if x is not None:
                x, y = np.array(x), np.array(y)
        else:
            x, y = loadEpisodesData(folder)

        if x is None or (min_num_x > 0 and y.shape[0] < min_num_x):
            printYellow("Skipping {}".format(folder))
            continue

        if y.shape[0] <= window:
            printYellow("Folder {}".format(folder))
            printYellow(
                "Not enough episodes for current window size = {}".format(
                    window))
            continue
        ok = True
        y = movingAverage(y, window)
        y_list.append(y)

        # Truncate x
        x = x[len(x) - len(y):]
        x_list.append(x)

    if not ok:
        printRed("Not enough data to plot anything with current config." +
                 " Consider decreasing --min-x")
        return

    lengths = list(map(len, x_list))
    min_x, max_x = np.min(lengths), np.max(lengths)

    print("Min x: {}".format(min_x))
    print("Max x: {}".format(max_x))

    for i in range(len(x_list)):
        x_list[i] = x_list[i][:min_x]
        y_list[i] = y_list[i][:min_x]

    x = np.array(x_list)[0]
    y = np.array(y_list)

    printGreen("{} Experiments".format(y.shape[0]))
    print("Min, Max rewards:", np.min(y), np.max(y))

    fig = plt.figure(title)
    # Compute mean for different seeds
    m = np.mean(y, axis=0)
    # Compute standard error
    s = np.squeeze(np.asarray(np.std(y, axis=0)))
    n = y.shape[0]
    plt.fill_between(x,
                     m - s / np.sqrt(n),
                     m + s / np.sqrt(n),
                     color=lightcolors[0])
    plt.plot(x, m, color=darkcolors[0], label=algo, linewidth=1)

    if timesteps:
        formatter = FuncFormatter(millions)
        plt.xlabel('Number of Timesteps')
        fig.axes[0].xaxis.set_major_formatter(formatter)
    else:
        plt.xlabel('Number of Episodes')
    plt.ylabel('Rewards')

    plt.title(title, **fontstyle)
    plt.ylim(y_limits)

    plt.legend(framealpha=0.5,
               labelspacing=0.01,
               loc='lower right',
               fontsize=16)

    if output_file != "":
        printGreen("Saving aggregated data to {}.npz".format(output_file))
        np.savez(output_file, x=x, y=y)

    if not no_display:
        plt.show()
예제 #22
0
def main():
    parser = argparse.ArgumentParser(
        description='Deteministic dataset generator for SRL training ' +
        '(can be used for environment testing)')
    parser.add_argument('--num-cpu',
                        type=int,
                        default=1,
                        help='number of cpu to run on')
    parser.add_argument('--num-episode',
                        type=int,
                        default=50,
                        help='number of episode to run')
    parser.add_argument('--max_steps_per_epoch',
                        type=int,
                        default=200,
                        help='max num steps per epoch')

    #CUSTOM ARGS. want to udpate eventually, i.e., specify a specific path for dr
    parser.add_argument(
        '--dr',
        action='store_true',
        default=False,
        help=
        "Include this flag to use the chosen environment with domain randomization"
    )
    parser.add_argument(
        '--alt',
        action='store_true',
        default=False,
        help=
        "Include this flag to use the chosen environment with alternate view")
    parser.add_argument(
        '--special_start',
        action='store_true',
        default=False,
        help=
        "Include this flag to use the chosen environment with the special start"
    )

    parser.add_argument(
        '--save-path',
        type=str,
        default='robotics-rl-srl/data/',
        help='Folder where the environments will save the output')
    parser.add_argument('--name',
                        type=str,
                        default='UNSETNAME',
                        help='Folder name for the output')
    parser.add_argument('--env',
                        type=str,
                        default='push_rotate',
                        help='The environment wanted',
                        choices=list(envs.keys()))

    parser.add_argument('--display', action='store_true', default=False)
    parser.add_argument('--no-record-data', action='store_true', default=False)

    parser.add_argument('--seed', type=int, default=0, help='the seed')
    parser.add_argument(
        '-f',
        '--force',
        action='store_true',
        default=False,
        help='Force the save, even if it overrides something else,' +
        ' including partial parts if they exist')

    #TODO: Change this argument to be for the diff types of tasks
    parser.add_argument('--multi-view',
                        action='store_true',
                        default=False,
                        help='Set a second camera to the scene')

    parser.add_argument(
        '--reward-dist',
        action='store_true',
        default=False,
        help=
        'Prints out the reward distribution when the dataset generation is finished'
    )
    parser.add_argument('--run-ppo2',
                        action='store_true',
                        default=False,
                        help='runs a ppo2 agent instead of a random agent')
    parser.add_argument(
        '--ppo2-timesteps',
        type=int,
        default=1000,
        help='number of timesteps to run PPO2 on before generating the dataset'
    )

    args = parser.parse_args()

    assert (args.num_cpu >
            0), "Error: number of cpu must be positive and non zero"

    assert (args.num_episode >
            0), "Error: number of episodes must be positive and non zero"
    # assert not(registered_env[args.env][3] is ThreadingType.NONE and args.num_cpu != 1), \
    # "Error: cannot have more than 1 CPU for the environment {}".format(args.env)

    if args.num_cpu > args.num_episode:
        args.num_cpu = args.num_episode
        printYellow(
            "num_cpu cannot be greater than num_episode, defaulting to {} cpus."
            .format(args.num_cpu))

    # this is done so seed 0 and 1 are different and not simply offset of the same datasets.
    args.seed = np.random.RandomState(args.seed).randint(int(1e10))

    # File exists, need to deal with it
    if not args.no_record_data and os.path.exists(args.save_path + args.name):
        assert args.force, "Error: save directory '{}' already exists".format(
            args.save_path + args.name)

        shutil.rmtree(args.save_path + args.name)
        for part in glob.glob(args.save_path + args.name + "_part-[0-9]*"):
            shutil.rmtree(part)
    if not args.no_record_data:
        # create the output
        os.makedirs(args.save_path + args.name, exist_ok=True)

    if args.num_cpu == 1:
        env_thread(args, 0, partition=False, use_ppo2=args.run_ppo2)
    else:
        # try and divide into multiple processes, with an environment each
        try:
            jobs = []
            for i in range(args.num_cpu):
                process = multiprocessing.Process(target=env_thread,
                                                  args=(args, i, True,
                                                        args.run_ppo2))
                jobs.append(process)

            for j in jobs:
                j.start()

            try:
                for j in jobs:
                    j.join()
            except Exception as e:
                printRed("Error: unable to join thread")
                raise e

        except Exception as e:
            printRed("Error: unable to start thread")
            raise e

    if not args.no_record_data and args.num_cpu > 1:
        # sleep 1 second, to avoid congruency issues from multiprocess (eg., files still writing)
        time.sleep(1)
        # get all the parts
        file_parts = sorted(glob.glob(args.save_path + args.name +
                                      "_part-[0-9]*"),
                            key=lambda a: int(a.split("-")[-1]))

        # move the config files from any as they are identical
        os.rename(file_parts[0] + "/dataset_config.json",
                  args.save_path + args.name + "/dataset_config.json")
        os.rename(file_parts[0] + "/env_globals.json",
                  args.save_path + args.name + "/env_globals.json")

        ground_truth = None
        preprocessed_data = None

        # used to convert the part record_id to the fused record_id
        record_id = 0
        for part in file_parts:
            # sort the record names alphabetically, then numerically
            records = sorted(glob.glob(part + "/record_[0-9]*"),
                             key=lambda a: int(a.split("_")[-1]))

            record_id_start = record_id
            for record in records:
                os.renames(
                    record, args.save_path + args.name +
                    "/record_{:03d}".format(record_id))
                record_id += 1

            # fuse the npz files together, in the right order
            if ground_truth is None:
                # init
                ground_truth = {}
                preprocessed_data = {}
                ground_truth_load = np.load(part + "/ground_truth.npz")
                preprocessed_data_load = np.load(part +
                                                 "/preprocessed_data.npz")

                for arr in ground_truth_load.files:
                    if arr == "images_path":
                        ground_truth[arr] = np.array([
                            convertImagePath(args, path, record_id_start)
                            for path in ground_truth_load[arr]
                        ])
                    else:
                        ground_truth[arr] = ground_truth_load[arr]
                for arr in preprocessed_data_load.files:
                    preprocessed_data[arr] = preprocessed_data_load[arr]

            else:
                ground_truth_load = np.load(part + "/ground_truth.npz")
                preprocessed_data_load = np.load(part +
                                                 "/preprocessed_data.npz")

                for arr in ground_truth_load.files:
                    if arr == "images_path":
                        sanitised_paths = np.array([
                            convertImagePath(args, path, record_id_start)
                            for path in ground_truth_load[arr]
                        ])
                        ground_truth[arr] = np.concatenate(
                            (ground_truth[arr], sanitised_paths))
                    else:
                        ground_truth[arr] = np.concatenate(
                            (ground_truth[arr], ground_truth_load[arr]))
                for arr in preprocessed_data_load.files:
                    preprocessed_data[arr] = np.concatenate(
                        (preprocessed_data[arr], preprocessed_data_load[arr]))

            # remove the current part folder
            shutil.rmtree(part)

        # save the fused outputs
        np.savez(args.save_path + args.name + "/ground_truth.npz",
                 **ground_truth)
        np.savez(args.save_path + args.name + "/preprocessed_data.npz",
                 **preprocessed_data)

    if args.reward_dist:
        rewards, counts = np.unique(
            np.load(args.save_path + args.name +
                    "/preprocessed_data.npz")['rewards'],
            return_counts=True)
        counts = [
            "{:.2f}%".format(val * 100) for val in counts / np.sum(counts)
        ]
        print("reward distribution:")
        [
            print(" ", reward, count)
            for reward, count in list(zip(rewards, counts))
        ]