def train(self): start_time = time.time() for itr in range(self.start_itr, self.n_itr): itr_start_time = time.time() with logger.prefix('itr #%d | ' % itr): logger.log("Obtaining samples...") sd = self.obtain_samples(itr) if self.alter_sd_fn is not None: self.alter_sd_fn(sd, *self.alter_sd_args) logger.log("Processing samples...") self.process_samples(itr, sd) logger.log("Logging diagnostics...") self.log_diagnostics(sd['stats']) logger.log("Optimizing policy...") self.optimize_policy(itr, sd) logger.record_tabular('Time', time.time() - start_time) logger.record_tabular('ItrTime', time.time() - itr_start_time) logger.dump_tabular(with_prefix=False) if itr % self.plot_every == 0 and self.plot and itr > self.plot_itr_threshold: rollout(self.policy, self.env_obj, self.max_path_length, plot=True) if itr % self.save_step == 0 and logger.get_snapshot_dir() is not None: self.save(logger.get_snapshot_dir() + '/snapshots', itr)
def optimize_policy(self, itr, samples_data): prev_param = get_numpy(self._target.get_params_flat()) self.policy.zero_grad() loss_before = self.loss(samples_data) loss_before.backward() flat_g = self.policy.get_params_flat() loss_before = get_numpy(loss_before).item() Hx = self._hvp_approach.build_eval(samples_data) descent_direction = krylov.cg(Hx, flat_g, cg_iters=self._cg_iters) initial_step_size = np.sqrt( 2.0 * self._max_constraint_val * (1. / (descent_direction.dot(Hx(descent_direction)) + 1e-8))) if np.isnan(initial_step_size): initial_step_size = 1. flat_descent_step = initial_step_size * descent_direction logger.log("descent direction computed") n_iter = 0 for n_iter, ratio in enumerate(self._backtrack_ratio**np.arange( self._max_backtracks)): cur_step = ratio * flat_descent_step cur_param = prev_param - cur_step self._target.set_params_flat(from_numpy(cur_param)) loss, constraint_val = self.compute_loss_terms(samples_data) if self._debug_nan and np.isnan(constraint_val): import ipdb ipdb.set_trace() if loss < loss_before and constraint_val <= self._max_constraint_val: break if (np.isnan(loss) or np.isnan(constraint_val) or loss >= loss_before or constraint_val >= self._max_constraint_val ) and not self._accept_violation: logger.log("Line search condition violated. Rejecting the step!") if np.isnan(loss): logger.log("Violated because loss is NaN") if np.isnan(constraint_val): logger.log("Violated because constraint %s is NaN" % self._constraint_name) if loss >= loss_before: logger.log("Violated because loss not improving") if constraint_val >= self._max_constraint_val: logger.log("Violated because constraint %s is violated" % self._constraint_name) self._target.set_param_values(prev_param, trainable=True) logger.log("backtrack iters: %d" % n_iter) logger.log("computing loss after") logger.log("optimization finished")
def optimize_policy(self, itr, samples_data): try_penalty = float( np.clip(self._penalty, self._min_penalty, self._max_penalty)) penalty_scale_factor = None def gen_f_opt(penalty): def f(flat_params): self.policy.set_params_flat(from_numpy(flat_params)) return self.get_opt_output(samples_data, penalty) return f cur_params = get_numpy(self.policy.get_params_flat().double()) opt_params = cur_params # Save views of objs for efficiency samples_data['obs_flat_var'] = np_to_var(samples_data['obs_flat']) samples_data['action_dist_flat'] = samples_data['action_dist'].detach( ).reshape((-1, samples_data['action_dist'].dim)) samples_data['actions_flat'] = samples_data['actions'].view( -1, self.action_dim) samples_data['discount_adv_var'] = np_to_var( samples_data['discount_adv']) for penalty_itr in range(self._max_penalty_itr): logger.log('trying penalty=%.3f...' % try_penalty) itr_opt_params, _, _ = scipy.optimize.fmin_l_bfgs_b( func=gen_f_opt(try_penalty), x0=cur_params, maxiter=self._max_opt_itr) _, try_loss, try_constraint_val = self.compute_loss_terms( samples_data, try_penalty) try_loss = get_numpy(try_loss)[0] try_constraint_val = get_numpy(try_constraint_val)[0] logger.log('penalty %f => loss %f, %s %f' % (try_penalty, try_loss, self._constraint_name, try_constraint_val)) if try_constraint_val < self._max_constraint_val or \ (penalty_itr == self._max_penalty_itr - 1 and opt_params is None): opt_params = itr_opt_params if not self._adapt_penalty: break # Decide scale factor on the first iteration, or if constraint violation yields numerical error if penalty_scale_factor is None or np.isnan(try_constraint_val): # Increase penalty if constraint violated, or if constraint term is NAN if try_constraint_val > self._max_constraint_val or np.isnan( try_constraint_val): penalty_scale_factor = self._increase_penalty_factor else: # Otherwise (i.e. constraint satisfied), shrink penalty penalty_scale_factor = self._decrease_penalty_factor opt_params = itr_opt_params else: if penalty_scale_factor > 1 and \ try_constraint_val <= self._max_constraint_val: break elif penalty_scale_factor < 1 and \ try_constraint_val >= self._max_constraint_val: break try_penalty *= penalty_scale_factor try_penalty = float( np.clip(try_penalty, self._min_penalty, self._max_penalty)) self._penalty = try_penalty self.policy.set_params_flat(from_numpy(opt_params))
def print_diagnostics(self, stats): for k in sorted(stats.keys()): logger.log('%s: %f' %(k, stats[k]))
def run_experiment( method_call, mode='local', exp_prefix='default', seed=None, variant=None, exp_id=0, unique_id=None, prepend_date_to_exp_prefix=True, use_gpu=False, snapshot_mode='last', snapshot_gap=1, n_parallel=0, base_log_dir=None, sync_interval=180, local_input_dir_to_mount_point_dict=None, # TODO(vitchyr): test this ): """ Usage: ``` def foo(variant): x = variant['x'] y = variant['y'] logger.log("sum", x+y) variant = { 'x': 4, 'y': 3, } run_experiment(foo, variant, exp_prefix="my-experiment") ``` Results are saved to `base_log_dir/<date>-my-experiment/<date>-my-experiment-<unique-id>` By default, the base_log_dir is determined by `config.LOCAL_LOG_DIR/` :param method_call: a function that takes in a dictionary as argument :param mode: 'local', 'local_docker', or 'ec2' :param exp_prefix: name of experiment :param seed: Seed for this specific trial. :param variant: Dictionary :param exp_id: One experiment = one variant setting + multiple seeds :param unique_id: If not set, the unique id is generated. :param prepend_date_to_exp_prefix: If False, do not prepend the date to the experiment directory. :param use_gpu: :param snapshot_mode: See rllab.logger :param snapshot_gap: See rllab.logger :param n_parallel: :param base_log_dir: Will over :param sync_interval: How often to sync s3 data (in seconds). :param local_input_dir_to_mount_point_dict: Dictionary for doodad. :return: """ try: import doodad import doodad.mode import doodad.mount as mount from doodad.utils import REPO_DIR except ImportError: return run_experiment_old( method_call, exp_prefix=exp_prefix, seed=seed, variant=variant, time_it=True, mode=mode, exp_id=exp_id, unique_id=unique_id, prepend_date_to_exp_prefix=prepend_date_to_exp_prefix, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, n_parallel=n_parallel, base_log_dir=base_log_dir, periodic_sync_interval=sync_interval, ) global ec2_okayed global gpu_ec2_okayed if local_input_dir_to_mount_point_dict is None: local_input_dir_to_mount_point_dict = {} else: raise NotImplementedError("TODO(vitchyr): Implement this") # Modify some of the inputs if seed is None: seed = random.randint(0, 100000) if variant is None: variant = {} for key, value in ppp.recursive_items(variant): # This check isn't really necessary, but it's to prevent myself from # forgetting to pass a variant through dot_map_dict_to_nested_dict. if "." in key: raise Exception( "Variants should not have periods in keys. Did you mean to " "convert {} into a nested dictionary?".format(key) ) if unique_id is None: unique_id = str(uuid.uuid4()) if prepend_date_to_exp_prefix: exp_prefix = time.strftime("%m-%d") + "-" + exp_prefix variant['seed'] = str(seed) variant['exp_id'] = str(exp_id) variant['unique_id'] = str(unique_id) logger.log("Variant:") logger.log(json.dumps(ppp.dict_to_safe_json(variant), indent=2)) mode_str_to_doodad_mode = { 'local': doodad.mode.Local(), 'local_docker': doodad.mode.LocalDocker( image=config.DOODAD_DOCKER_IMAGE, ), 'ec2': doodad.mode.EC2AutoconfigDocker( image=config.DOODAD_DOCKER_IMAGE, region='us-east-2', instance_type='c4.large', spot_price=0.03, s3_log_prefix=exp_prefix, s3_log_name="{}-id{}-s{}".format(exp_prefix, exp_id, seed), ), } if base_log_dir is None: base_log_dir = config.LOCAL_LOG_DIR output_mount_point = config.OUTPUT_DIR_FOR_DOODAD_TARGET mounts = [ mount.MountLocal(local_dir=REPO_DIR, pythonpath=True), ] for code_dir in config.CODE_DIRS_TO_MOUNT: mounts.append(mount.MountLocal(local_dir=code_dir, pythonpath=True)) for dir, mount_point in local_input_dir_to_mount_point_dict.items(): mounts.append(mount.MountLocal( local_dir=dir, mount_point=mount_point, pythonpath=False, )) if mode != 'local': for non_code_mapping in config.DIR_AND_MOUNT_POINT_MAPPINGS: mounts.append(mount.MountLocal(**non_code_mapping)) if mode == 'ec2': if not ec2_okayed and not query_yes_no( "EC2 costs money. Are you sure you want to run?" ): sys.exit(1) if not gpu_ec2_okayed and use_gpu: if not query_yes_no( "EC2 is more expensive with GPUs. Confirm?" ): sys.exit(1) gpu_ec2_okayed = True ec2_okayed = True output_mount = mount.MountS3( s3_path='', mount_point=output_mount_point, output=True, sync_interval=sync_interval, ) # This will be over-written by the snapshot dir, but I'm setting it for # good measure. base_log_dir_for_script = output_mount_point # The snapshot dir needs to be specified for S3 because S3 will # automatically create the experiment director and sub-directory. snapshot_dir_for_script = output_mount_point elif mode == 'local': output_mount = mount.MountLocal( local_dir=base_log_dir, mount_point=None, # For purely local mode, skip mounting. output=True, ) base_log_dir_for_script = base_log_dir # The snapshot dir will be automatically created snapshot_dir_for_script = None else: output_mount = mount.MountLocal( local_dir=base_log_dir, mount_point=output_mount_point, output=True, ) base_log_dir_for_script = output_mount_point # The snapshot dir will be automatically created snapshot_dir_for_script = None mounts.append(output_mount) repo = git.Repo(os.getcwd()) code_diff = repo.git.diff(None) if len(code_diff) > 5000: logger.log("Git diff %d greater than 5000. Not saving diff." % len(code_diff)) code_diff = None run_experiment_kwargs = dict( exp_prefix=exp_prefix, variant=variant, exp_id=exp_id, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, code_diff=code_diff, commit_hash=repo.head.commit.hexsha, script_name=main.__file__, n_parallel=n_parallel, base_log_dir=base_log_dir_for_script, ) doodad.launch_python( target=config.RUN_DOODAD_EXPERIMENT_SCRIPT_PATH, mode=mode_str_to_doodad_mode[mode], mount_points=mounts, args={ 'method_call': method_call, 'output_dir': snapshot_dir_for_script, 'run_experiment_kwargs': run_experiment_kwargs, }, use_cloudpickle=True, fake_display=True if mode != 'local' else False, )
def run_experiment_old( task, exp_prefix='default', seed=None, variant=None, time_it=True, save_profile=False, profile_file='time_log.prof', mode='here', exp_id=0, unique_id=None, prepend_date_to_exp_prefix=True, use_gpu=False, snapshot_mode='last', snapshot_gap=1, n_parallel=0, base_log_dir=None, **run_experiment_lite_kwargs ): """ Run a task via the rllab interface, i.e. serialize it and then run it via the run_experiment_lite script. This will soon be deprecated. :param task: :param exp_prefix: :param seed: :param variant: :param time_it: Add a "time" command to the python command? :param save_profile: Create a cProfile log? :param profile_file: Where to save the cProfile log. :param mode: 'here' will run the code in line, without any serialization Other options include 'local', 'local_docker', and 'ec2'. See run_experiment_lite documentation to learn what those modes do. :param exp_id: Experiment ID. Should be unique across all experiments. Note that one experiment may correspond to multiple seeds. :param unique_id: Unique ID should be unique across all runs--even different seeds! :param prepend_date_to_exp_prefix: If True, prefix "month-day_" to exp_prefix :param run_experiment_lite_kwargs: kwargs to be passed to `run_experiment_lite` :return: """ if seed is None: seed = random.randint(0, 100000) if variant is None: variant = {} if unique_id is None: unique_id = str(uuid.uuid4()) if prepend_date_to_exp_prefix: exp_prefix = time.strftime("%m-%d") + "_" + exp_prefix variant['seed'] = str(seed) variant['exp_id'] = str(exp_id) variant['unique_id'] = str(unique_id) logger.log("Variant:") logger.log(json.dumps(ppp.dict_to_safe_json(variant), indent=2)) command_words = [] if time_it: command_words.append('time') command_words.append('python') if save_profile: command_words += ['-m cProfile -o', profile_file] repo = git.Repo(os.getcwd()) diff_string = repo.git.diff(None) commit_hash = repo.head.commit.hexsha script_name = "tmp" if mode == 'here': log_dir, exp_name = create_log_dir(exp_prefix, exp_id, seed, base_log_dir) data = dict( log_dir=log_dir, exp_name=exp_name, mode=mode, variant=variant, exp_id=exp_id, exp_prefix=exp_prefix, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, diff_string=diff_string, commit_hash=commit_hash, n_parallel=n_parallel, base_log_dir=base_log_dir, script_name=script_name, ) save_experiment_data(data, log_dir) if mode == 'here': run_experiment_here( task, exp_prefix=exp_prefix, variant=variant, exp_id=exp_id, seed=seed, use_gpu=use_gpu, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, code_diff=diff_string, commit_hash=commit_hash, script_name=script_name, n_parallel=n_parallel, base_log_dir=base_log_dir, ) else: if mode == "ec2" and use_gpu: if not query_yes_no( "EC2 is more expensive with GPUs. Confirm?" ): sys.exit(1) code_diff = ( base64.b64encode(cloudpickle.dumps(diff_string)).decode("utf-8") ) run_experiment_lite( task, snapshot_mode=snapshot_mode, snapshot_gap=snapshot_gap, exp_prefix=exp_prefix, variant=variant, seed=seed, use_cloudpickle=True, python_command=' '.join(command_words), mode=mode, use_gpu=use_gpu, script="railrl/scripts/run_experiment_lite.py", code_diff=code_diff, commit_hash=commit_hash, script_name=script_name, n_parallel=n_parallel, **run_experiment_lite_kwargs )
def print_diagnostics(self, stats): for k, v in stats.items(): logger.log('%s: %f' % (k, v))