Exemple #1
0
def obtain_BIRL_summary(mdp_class,
                        data_loc,
                        mdp_parameters,
                        BIRL_params,
                        step_cost_flag,
                        visualize_history_priors=False,
                        visualize_summary=False):
    try:
        with open(
                'models/' + data_loc +
                '/BIRL_summary_{}.pickle'.format(BIRL_params['eval_fn']),
                'rb') as f:
            bayesian_IRL_summary, wt_candidates, history_priors = pickle.load(
                f)
    except:
        wt_candidates = ps_helpers.discretize_wt_candidates(
            data_loc,
            mdp_parameters['weights'],
            mdp_parameters['weights_lb'],
            mdp_parameters['weights_ub'],
            step_cost_flag,
            n_wt_partitions=BIRL_params['n_wt_partitions'],
            iter_idx=BIRL_params['iter_idx'])
        wt_vi_traj_candidates = ps_helpers.obtain_env_policies(
            mdp_class, data_loc, wt_candidates, mdp_parameters, 'BIRL')

        bayesian_IRL_summary, wt_candidates, history_priors = bayesian_IRL.obtain_summary(
            BIRL_params['n_demonstrations'], mdp_parameters['weights'],
            wt_candidates, wt_vi_traj_candidates, BIRL_params['eval_fn'])

        with open(
                'models/' + data_loc +
                '/BIRL_summary_{}.pickle'.format(BIRL_params['eval_fn']),
                'wb') as f:
            pickle.dump((bayesian_IRL_summary, wt_candidates, history_priors),
                        f)

    if visualize_history_priors or visualize_summary:
        bayesian_IRL.visualize_summary(
            bayesian_IRL_summary,
            wt_candidates,
            history_priors,
            visualize_summary=visualize_summary,
            visualize_history_priors=visualize_history_priors)

    return bayesian_IRL_summary, wt_candidates, history_priors
Exemple #2
0
def obtain_BEC_summary(mdp_class, data_loc, mdp_parameters, weights, step_cost_flag, summary_type, summary_variant, n_train_demos, BEC_depth=1, visualize_summary=False):
    try:
        with open('models/' + data_loc + '/BEC_summary.pickle', 'rb') as f:
            BEC_summary = pickle.load(f)
    except:
        wt_vi_traj_candidates = ps_helpers.obtain_env_policies(mdp_class, data_loc, np.expand_dims(weights, axis=0),
                                                               mdp_parameters, 'ground_truth')
        try:
            with open('models/' + data_loc + '/base_constraints.pickle', 'rb') as f:
                policy_constraints, min_subset_constraints_record, env_record, traj_record = pickle.load(f)
        except:
            if summary_type == 'demo':
                # a) use optimal trajectories from starting states to extract constraints
                opt_trajs = []
                for wt_vi_traj_candidate in wt_vi_traj_candidates:
                    opt_trajs.append(wt_vi_traj_candidate[0][2])
                policy_constraints, min_subset_constraints_record, env_record, traj_record = BEC.extract_constraints(wt_vi_traj_candidates, weights, step_cost_flag, BEC_depth=BEC_depth, trajectories=opt_trajs, print_flag=True)
            else:
                # b) use full policy to extract constraints
                policy_constraints, min_subset_constraints_record, env_record, traj_record = BEC.extract_constraints(wt_vi_traj_candidates, weights, step_cost_flag, print_flag=True)
            with open('models/' + data_loc + '/base_constraints.pickle', 'wb') as f:
                pickle.dump((policy_constraints, min_subset_constraints_record, env_record, traj_record), f)

        try:
            with open('models/' + data_loc + '/BEC_constraints.pickle', 'rb') as f:
                min_BEC_constraints, BEC_lengths_record = pickle.load(f)
        except:
            min_BEC_constraints, BEC_lengths_record = BEC.extract_BEC_constraints(policy_constraints, min_subset_constraints_record, weights, step_cost_flag)

            with open('models/' + data_loc + '/BEC_constraints.pickle', 'wb') as f:
                pickle.dump((min_BEC_constraints, BEC_lengths_record), f)

        try:
            with open('models/' + data_loc + '/BEC_summary.pickle', 'rb') as f:
                BEC_summary = pickle.load(f)
        except:
            BEC_summary = BEC.obtain_summary(summary_variant, wt_vi_traj_candidates, min_BEC_constraints, BEC_lengths_record, min_subset_constraints_record, env_record, traj_record, weights, step_cost_flag, n_train_demos=n_train_demos)
            with open('models/' + data_loc + '/BEC_summary.pickle', 'wb') as f:
                pickle.dump(BEC_summary, f)

    if visualize_summary:
        BEC.visualize_summary(BEC_summary, weights, step_cost_flag)

    return BEC_summary
Exemple #3
0
def obtain_test_environments(mdp_class, data_loc, mdp_parameters, weights, BEC_params, step_cost_flag, summary=None, visualize_test_env=False):
    '''
    Summary: Correlate the difficulty of a test environment with the generalized area of the BEC region obtain by the
    corresponding optimal demonstration. Return the desired number and difficulty of test environments (to be given
    to the human to test his understanding of the agent's policy).
    '''
    # use generalized area of the BEC region to select test environments
    try:
        with open('models/' + data_loc + '/test_environments.pickle', 'rb') as f:
            test_wt_vi_traj_tuples, test_BEC_lengths, test_BEC_constraints = pickle.load(f)

    except:
        wt_vi_traj_candidates = ps_helpers.obtain_env_policies(mdp_class, data_loc, np.expand_dims(weights, axis=0), mdp_parameters, 'ground_truth')

        try:
            with open('models/' + data_loc + '/base_constraints.pickle', 'rb') as f:
                policy_constraints, min_subset_constraints_record, env_record, traj_record = pickle.load(f)
        except:
            if params.BEC['summary_type'] == 'demo':
                # a) use optimal trajectories from starting states to extract constraints
                opt_trajs = []
                for wt_vi_traj_candidate in wt_vi_traj_candidates:
                    opt_trajs.append(wt_vi_traj_candidate[0][2])
                policy_constraints, min_subset_constraints_record, env_record, traj_record = BEC.extract_constraints(wt_vi_traj_candidates, weights, step_cost_flag, BEC_depth=BEC_depth, trajectories=opt_trajs, print_flag=True)
            else:
                # b) use full policy to extract constraints
                policy_constraints, min_subset_constraints_record, env_record, traj_record = BEC.extract_constraints(wt_vi_traj_candidates, weights, step_cost_flag, print_flag=True)
            with open('models/' + data_loc + '/base_constraints.pickle', 'wb') as f:
                pickle.dump((policy_constraints, min_subset_constraints_record, env_record, traj_record), f)

        test_wt_vi_traj_tuples, test_BEC_lengths, test_BEC_constraints = \
            ps_helpers.obtain_test_environments(wt_vi_traj_candidates, min_subset_constraints_record, env_record, traj_record, weights, BEC_params['n_test_demos'], BEC_params['test_difficulty'], step_cost_flag, summary, BEC_params['summary_type'])

        with open('models/' + data_loc + '/test_environments.pickle', 'wb') as f:
            pickle.dump((test_wt_vi_traj_tuples, test_BEC_lengths, test_BEC_constraints), f)

    if visualize_test_env:
        BEC.visualize_test_envs(test_wt_vi_traj_tuples, test_BEC_lengths, test_BEC_constraints, weights, step_cost_flag)
    return test_wt_vi_traj_tuples, test_BEC_lengths, test_BEC_constraints