Example #1
0
 def train(self):
     start_time = time.time()
     for itr in range(self.start_itr, self.n_itr):
         itr_start_time = time.time()
         with logger.prefix('itr #%d | ' % itr):
             logger.log("Obtaining samples...")
             sd = self.obtain_samples(itr)
             if self.alter_sd_fn is not None:
                 self.alter_sd_fn(sd, *self.alter_sd_args)
             logger.log("Processing samples...")
             self.process_samples(itr, sd)
             logger.log("Logging diagnostics...")
             self.log_diagnostics(sd['stats'])
             logger.log("Optimizing policy...")
             self.optimize_policy(itr, sd)
             logger.record_tabular('Time', time.time() - start_time)
             logger.record_tabular('ItrTime', time.time() - itr_start_time)
             logger.dump_tabular(with_prefix=False)
         if itr % self.plot_every == 0 and self.plot and itr > self.plot_itr_threshold:
             rollout(self.policy, self.env_obj, self.max_path_length, plot=True)
         if itr % self.save_step == 0 and logger.get_snapshot_dir() is not None:
             self.save(logger.get_snapshot_dir() + '/snapshots', itr)
Example #2
0
    def optimize_policy(self, itr, samples_data):
        prev_param = get_numpy(self._target.get_params_flat())

        self.policy.zero_grad()
        loss_before = self.loss(samples_data)
        loss_before.backward()
        flat_g = self.policy.get_params_flat()
        loss_before = get_numpy(loss_before).item()

        Hx = self._hvp_approach.build_eval(samples_data)

        descent_direction = krylov.cg(Hx, flat_g, cg_iters=self._cg_iters)

        initial_step_size = np.sqrt(
            2.0 * self._max_constraint_val *
            (1. / (descent_direction.dot(Hx(descent_direction)) + 1e-8)))
        if np.isnan(initial_step_size):
            initial_step_size = 1.
        flat_descent_step = initial_step_size * descent_direction

        logger.log("descent direction computed")

        n_iter = 0
        for n_iter, ratio in enumerate(self._backtrack_ratio**np.arange(
                self._max_backtracks)):
            cur_step = ratio * flat_descent_step
            cur_param = prev_param - cur_step
            self._target.set_params_flat(from_numpy(cur_param))
            loss, constraint_val = self.compute_loss_terms(samples_data)

            if self._debug_nan and np.isnan(constraint_val):
                import ipdb
                ipdb.set_trace()
            if loss < loss_before and constraint_val <= self._max_constraint_val:
                break
        if (np.isnan(loss) or np.isnan(constraint_val) or loss >= loss_before
                or constraint_val >= self._max_constraint_val
            ) and not self._accept_violation:
            logger.log("Line search condition violated. Rejecting the step!")
            if np.isnan(loss):
                logger.log("Violated because loss is NaN")
            if np.isnan(constraint_val):
                logger.log("Violated because constraint %s is NaN" %
                           self._constraint_name)
            if loss >= loss_before:
                logger.log("Violated because loss not improving")
            if constraint_val >= self._max_constraint_val:
                logger.log("Violated because constraint %s is violated" %
                           self._constraint_name)
            self._target.set_param_values(prev_param, trainable=True)
        logger.log("backtrack iters: %d" % n_iter)
        logger.log("computing loss after")
        logger.log("optimization finished")
Example #3
0
    def optimize_policy(self, itr, samples_data):
        try_penalty = float(
            np.clip(self._penalty, self._min_penalty, self._max_penalty))
        penalty_scale_factor = None

        def gen_f_opt(penalty):
            def f(flat_params):
                self.policy.set_params_flat(from_numpy(flat_params))
                return self.get_opt_output(samples_data, penalty)

            return f

        cur_params = get_numpy(self.policy.get_params_flat().double())
        opt_params = cur_params

        # Save views of objs for efficiency
        samples_data['obs_flat_var'] = np_to_var(samples_data['obs_flat'])
        samples_data['action_dist_flat'] = samples_data['action_dist'].detach(
        ).reshape((-1, samples_data['action_dist'].dim))
        samples_data['actions_flat'] = samples_data['actions'].view(
            -1, self.action_dim)
        samples_data['discount_adv_var'] = np_to_var(
            samples_data['discount_adv'])

        for penalty_itr in range(self._max_penalty_itr):
            logger.log('trying penalty=%.3f...' % try_penalty)

            itr_opt_params, _, _ = scipy.optimize.fmin_l_bfgs_b(
                func=gen_f_opt(try_penalty),
                x0=cur_params,
                maxiter=self._max_opt_itr)

            _, try_loss, try_constraint_val = self.compute_loss_terms(
                samples_data, try_penalty)
            try_loss = get_numpy(try_loss)[0]
            try_constraint_val = get_numpy(try_constraint_val)[0]

            logger.log('penalty %f => loss %f, %s %f' %
                       (try_penalty, try_loss, self._constraint_name,
                        try_constraint_val))

            if try_constraint_val < self._max_constraint_val or \
                    (penalty_itr == self._max_penalty_itr - 1 and opt_params is None):
                opt_params = itr_opt_params

            if not self._adapt_penalty:
                break

            # Decide scale factor on the first iteration, or if constraint violation yields numerical error
            if penalty_scale_factor is None or np.isnan(try_constraint_val):
                # Increase penalty if constraint violated, or if constraint term is NAN
                if try_constraint_val > self._max_constraint_val or np.isnan(
                        try_constraint_val):
                    penalty_scale_factor = self._increase_penalty_factor
                else:
                    # Otherwise (i.e. constraint satisfied), shrink penalty
                    penalty_scale_factor = self._decrease_penalty_factor
                    opt_params = itr_opt_params
            else:
                if penalty_scale_factor > 1 and \
                                try_constraint_val <= self._max_constraint_val:
                    break
                elif penalty_scale_factor < 1 and \
                                try_constraint_val >= self._max_constraint_val:
                    break
            try_penalty *= penalty_scale_factor
            try_penalty = float(
                np.clip(try_penalty, self._min_penalty, self._max_penalty))
            self._penalty = try_penalty

        self.policy.set_params_flat(from_numpy(opt_params))
Example #4
0
 def print_diagnostics(self, stats):
     for k in sorted(stats.keys()):
         logger.log('%s: %f' %(k, stats[k]))
Example #5
0
def run_experiment(
        method_call,
        mode='local',
        exp_prefix='default',
        seed=None,
        variant=None,
        exp_id=0,
        unique_id=None,
        prepend_date_to_exp_prefix=True,
        use_gpu=False,
        snapshot_mode='last',
        snapshot_gap=1,
        n_parallel=0,
        base_log_dir=None,
        sync_interval=180,
        local_input_dir_to_mount_point_dict=None,  # TODO(vitchyr): test this
):
    """
    Usage:

    ```
    def foo(variant):
        x = variant['x']
        y = variant['y']
        logger.log("sum", x+y)

    variant = {
        'x': 4,
        'y': 3,
    }
    run_experiment(foo, variant, exp_prefix="my-experiment")
    ```

    Results are saved to
    `base_log_dir/<date>-my-experiment/<date>-my-experiment-<unique-id>`

    By default, the base_log_dir is determined by
    `config.LOCAL_LOG_DIR/`

    :param method_call: a function that takes in a dictionary as argument
    :param mode: 'local', 'local_docker', or 'ec2'
    :param exp_prefix: name of experiment
    :param seed: Seed for this specific trial.
    :param variant: Dictionary
    :param exp_id: One experiment = one variant setting + multiple seeds
    :param unique_id: If not set, the unique id is generated.
    :param prepend_date_to_exp_prefix: If False, do not prepend the date to
    the experiment directory.
    :param use_gpu:
    :param snapshot_mode: See rllab.logger
    :param snapshot_gap: See rllab.logger
    :param n_parallel:
    :param base_log_dir: Will over
    :param sync_interval: How often to sync s3 data (in seconds).
    :param local_input_dir_to_mount_point_dict: Dictionary for doodad.
    :return:
    """
    try:
        import doodad
        import doodad.mode
        import doodad.mount as mount
        from doodad.utils import REPO_DIR
    except ImportError:
        return run_experiment_old(
            method_call,
            exp_prefix=exp_prefix,
            seed=seed,
            variant=variant,
            time_it=True,
            mode=mode,
            exp_id=exp_id,
            unique_id=unique_id,
            prepend_date_to_exp_prefix=prepend_date_to_exp_prefix,
            use_gpu=use_gpu,
            snapshot_mode=snapshot_mode,
            snapshot_gap=snapshot_gap,
            n_parallel=n_parallel,
            base_log_dir=base_log_dir,
            periodic_sync_interval=sync_interval,
        )
    global ec2_okayed
    global gpu_ec2_okayed
    if local_input_dir_to_mount_point_dict is None:
        local_input_dir_to_mount_point_dict = {}
    else:
        raise NotImplementedError("TODO(vitchyr): Implement this")
    # Modify some of the inputs
    if seed is None:
        seed = random.randint(0, 100000)
    if variant is None:
        variant = {}
    for key, value in ppp.recursive_items(variant):
        # This check isn't really necessary, but it's to prevent myself from
        # forgetting to pass a variant through dot_map_dict_to_nested_dict.
        if "." in key:
            raise Exception(
                "Variants should not have periods in keys. Did you mean to "
                "convert {} into a nested dictionary?".format(key)
            )
    if unique_id is None:
        unique_id = str(uuid.uuid4())
    if prepend_date_to_exp_prefix:
        exp_prefix = time.strftime("%m-%d") + "-" + exp_prefix
    variant['seed'] = str(seed)
    variant['exp_id'] = str(exp_id)
    variant['unique_id'] = str(unique_id)
    logger.log("Variant:")
    logger.log(json.dumps(ppp.dict_to_safe_json(variant), indent=2))

    mode_str_to_doodad_mode = {
        'local': doodad.mode.Local(),
        'local_docker': doodad.mode.LocalDocker(
            image=config.DOODAD_DOCKER_IMAGE,
        ),
        'ec2': doodad.mode.EC2AutoconfigDocker(
            image=config.DOODAD_DOCKER_IMAGE,
            region='us-east-2',
            instance_type='c4.large',
            spot_price=0.03,
            s3_log_prefix=exp_prefix,
            s3_log_name="{}-id{}-s{}".format(exp_prefix, exp_id, seed),
        ),
    }

    if base_log_dir is None:
        base_log_dir = config.LOCAL_LOG_DIR
    output_mount_point = config.OUTPUT_DIR_FOR_DOODAD_TARGET
    mounts = [
        mount.MountLocal(local_dir=REPO_DIR, pythonpath=True),
    ]
    for code_dir in config.CODE_DIRS_TO_MOUNT:
        mounts.append(mount.MountLocal(local_dir=code_dir, pythonpath=True))
    for dir, mount_point in local_input_dir_to_mount_point_dict.items():
        mounts.append(mount.MountLocal(
            local_dir=dir,
            mount_point=mount_point,
            pythonpath=False,
        ))

    if mode != 'local':
        for non_code_mapping in config.DIR_AND_MOUNT_POINT_MAPPINGS:
            mounts.append(mount.MountLocal(**non_code_mapping))

    if mode == 'ec2':
        if not ec2_okayed and not query_yes_no(
                "EC2 costs money. Are you sure you want to run?"
        ):
            sys.exit(1)
        if not gpu_ec2_okayed and use_gpu:
            if not query_yes_no(
                    "EC2 is more expensive with GPUs. Confirm?"
            ):
                sys.exit(1)
            gpu_ec2_okayed = True
        ec2_okayed = True
        output_mount = mount.MountS3(
            s3_path='',
            mount_point=output_mount_point,
            output=True,
            sync_interval=sync_interval,
        )
        # This will be over-written by the snapshot dir, but I'm setting it for
        # good measure.
        base_log_dir_for_script = output_mount_point
        # The snapshot dir needs to be specified for S3 because S3 will
        # automatically create the experiment director and sub-directory.
        snapshot_dir_for_script = output_mount_point
    elif mode == 'local':
        output_mount = mount.MountLocal(
            local_dir=base_log_dir,
            mount_point=None,  # For purely local mode, skip mounting.
            output=True,
        )
        base_log_dir_for_script = base_log_dir
        # The snapshot dir will be automatically created
        snapshot_dir_for_script = None
    else:
        output_mount = mount.MountLocal(
            local_dir=base_log_dir,
            mount_point=output_mount_point,
            output=True,
        )
        base_log_dir_for_script = output_mount_point
        # The snapshot dir will be automatically created
        snapshot_dir_for_script = None
    mounts.append(output_mount)

    repo = git.Repo(os.getcwd())
    code_diff = repo.git.diff(None)
    if len(code_diff) > 5000:
        logger.log("Git diff %d greater than 5000. Not saving diff." % len(code_diff))
        code_diff = None
    run_experiment_kwargs = dict(
        exp_prefix=exp_prefix,
        variant=variant,
        exp_id=exp_id,
        seed=seed,
        use_gpu=use_gpu,
        snapshot_mode=snapshot_mode,
        snapshot_gap=snapshot_gap,
        code_diff=code_diff,
        commit_hash=repo.head.commit.hexsha,
        script_name=main.__file__,
        n_parallel=n_parallel,
        base_log_dir=base_log_dir_for_script,
    )
    doodad.launch_python(
        target=config.RUN_DOODAD_EXPERIMENT_SCRIPT_PATH,
        mode=mode_str_to_doodad_mode[mode],
        mount_points=mounts,
        args={
            'method_call': method_call,
            'output_dir': snapshot_dir_for_script,
            'run_experiment_kwargs': run_experiment_kwargs,
        },
        use_cloudpickle=True,
        fake_display=True if mode != 'local' else False,
    )
Example #6
0
def run_experiment_old(
        task,
        exp_prefix='default',
        seed=None,
        variant=None,
        time_it=True,
        save_profile=False,
        profile_file='time_log.prof',
        mode='here',
        exp_id=0,
        unique_id=None,
        prepend_date_to_exp_prefix=True,
        use_gpu=False,
        snapshot_mode='last',
        snapshot_gap=1,
        n_parallel=0,
        base_log_dir=None,
        **run_experiment_lite_kwargs
):
    """
    Run a task via the rllab interface, i.e. serialize it and then run it via
    the run_experiment_lite script.

    This will soon be deprecated.

    :param task:
    :param exp_prefix:
    :param seed:
    :param variant:
    :param time_it: Add a "time" command to the python command?
    :param save_profile: Create a cProfile log?
    :param profile_file: Where to save the cProfile log.
    :param mode: 'here' will run the code in line, without any serialization
    Other options include 'local', 'local_docker', and 'ec2'. See
    run_experiment_lite documentation to learn what those modes do.
    :param exp_id: Experiment ID. Should be unique across all
    experiments. Note that one experiment may correspond to multiple seeds.
    :param unique_id: Unique ID should be unique across all runs--even different
    seeds!
    :param prepend_date_to_exp_prefix: If True, prefix "month-day_" to
    exp_prefix
    :param run_experiment_lite_kwargs: kwargs to be passed to
    `run_experiment_lite`
    :return:
    """
    if seed is None:
        seed = random.randint(0, 100000)
    if variant is None:
        variant = {}
    if unique_id is None:
        unique_id = str(uuid.uuid4())
    if prepend_date_to_exp_prefix:
        exp_prefix = time.strftime("%m-%d") + "_" + exp_prefix
    variant['seed'] = str(seed)
    variant['exp_id'] = str(exp_id)
    variant['unique_id'] = str(unique_id)
    logger.log("Variant:")
    logger.log(json.dumps(ppp.dict_to_safe_json(variant), indent=2))
    command_words = []
    if time_it:
        command_words.append('time')
    command_words.append('python')
    if save_profile:
        command_words += ['-m cProfile -o', profile_file]
    repo = git.Repo(os.getcwd())
    diff_string = repo.git.diff(None)
    commit_hash = repo.head.commit.hexsha
    script_name = "tmp"
    if mode == 'here':
        log_dir, exp_name = create_log_dir(exp_prefix, exp_id, seed,
                                           base_log_dir)
        data = dict(
            log_dir=log_dir,
            exp_name=exp_name,
            mode=mode,
            variant=variant,
            exp_id=exp_id,
            exp_prefix=exp_prefix,
            seed=seed,
            use_gpu=use_gpu,
            snapshot_mode=snapshot_mode,
            snapshot_gap=snapshot_gap,
            diff_string=diff_string,
            commit_hash=commit_hash,
            n_parallel=n_parallel,
            base_log_dir=base_log_dir,
            script_name=script_name,
        )
        save_experiment_data(data, log_dir)
    if mode == 'here':
        run_experiment_here(
            task,
            exp_prefix=exp_prefix,
            variant=variant,
            exp_id=exp_id,
            seed=seed,
            use_gpu=use_gpu,
            snapshot_mode=snapshot_mode,
            snapshot_gap=snapshot_gap,
            code_diff=diff_string,
            commit_hash=commit_hash,
            script_name=script_name,
            n_parallel=n_parallel,
            base_log_dir=base_log_dir,
        )
    else:
        if mode == "ec2" and use_gpu:
            if not query_yes_no(
                    "EC2 is more expensive with GPUs. Confirm?"
            ):
                sys.exit(1)
        code_diff = (
            base64.b64encode(cloudpickle.dumps(diff_string)).decode("utf-8")
        )
        run_experiment_lite(
            task,
            snapshot_mode=snapshot_mode,
            snapshot_gap=snapshot_gap,
            exp_prefix=exp_prefix,
            variant=variant,
            seed=seed,
            use_cloudpickle=True,
            python_command=' '.join(command_words),
            mode=mode,
            use_gpu=use_gpu,
            script="railrl/scripts/run_experiment_lite.py",
            code_diff=code_diff,
            commit_hash=commit_hash,
            script_name=script_name,
            n_parallel=n_parallel,
            **run_experiment_lite_kwargs
        )
Example #7
0
 def print_diagnostics(self, stats):
     for k, v in stats.items():
         logger.log('%s: %f' % (k, v))