def debug_demonstrations():

    world = create_random_10x10_3feature()

    print("rewards")
    world.print_rewards()

    import time

    print("features")
    utils.display_onehot_state_features(world)

    Q = mdp.compute_q_values(world)
    #print("Q-values")
    #print(Q)

    print("optimal policy")
    opt_policy = mdp.find_optimal_policy(world, Q=Q)
    #print(opt_policy)
    print("optimal policy")
    world.print_map(world.to_arrows(opt_policy))

    print(world.terminals)
    print("demo 1")
    demoA = utils.optimal_rollout_from_Qvals((1, 1), 3, Q, world, 0.0001)
    for (s, a) in demoA:
        print("({},{})".format(s, world.to_arrow(a)))
    print(mdp.calculate_trajectory_feature_counts(demoA, world))

    print()
    print("demo 2")
    demoB = utils.sa_optimal_rollout_from_Qvals((1, 1), (0, 1), 3, Q, world,
                                                0.0001)
    for (s, a) in demoB:
        print("({},{})".format(s, world.to_arrow(a)))
    print(mdp.calculate_trajectory_feature_counts(demoB, world))

    tpair = TrajPair(demoA, demoB, world, 0.0001)
    print(world.weights)
print("SOLUTION:::::")
for i, true_world in enumerate(mdp_set_cover):
    print()
    V = mdp.value_iteration(true_world, epsilon=precision)
    Qopt = mdp.compute_q_values(true_world, V=V, eps=precision)
    opt_policy = mdp.find_optimal_policy(true_world, Q=Qopt, epsilon=precision)

    print("true weights: ", true_weights)

    print("rewards")
    true_world.print_rewards()
    print("value function")

    true_world.print_map(V)
    print("mdp features")
    utils.display_onehot_state_features(true_world)

    print("optimal policy")
    true_world.print_map(true_world.to_arrows(opt_policy))

    filename = "./data_analysis/figs/twoXtwo/setcover" + str(i) + ".png"
    mdp_plot.plot_optimal_policy_vav(opt_policy,
                                     true_world.features,
                                     filename=filename)
    #plot the AEC for each solution
    mdp_teacher = machine_teaching.MdpFamilyTeacher([true_world], precision,
                                                    debug)
    halfspaces, non_redundant_indices = mdp_teacher.get_halfspaces_for_plotting(
    )
    filename = "./data_analysis/figs/twoXtwo/mdp_aec" + str(i) + ".png"
    plot_aec.plot_feasible_region(halfspaces, non_redundant_indices, filename)
    def get_machine_teaching_mdps(self):

        constraint_set = self.family_halfspaces
        candidate_mdps = self.mdp_family
        candidate_halfspaces = self.mdp_halfspaces
        #create boolean bookkeeping to see what has been covered in the set
        covered = [False for _ in constraint_set]

        #for each candidate demonstration trajectory check how many uncovered set elements it covers and find one with max added covers
        total_covered = 0
        opt_mdps = []
        while total_covered < len(constraint_set):
            if self.debug: print("set cover iteration")
            constraints_to_add = None
            best_mdp = None
            max_count = 0
            for i, mdp_env in enumerate(candidate_mdps):
                # if self.debug:
                #     print("-"*20)
                #     print("MDP", i)

                #     V = mdp.value_iteration(mdp_env, epsilon=self.precision)
                #     Qopt = mdp.compute_q_values(mdp_env, V=V, eps=self.precision)
                #     opt_policy = mdp.find_optimal_policy(mdp_env, Q = Qopt, epsilon=self.precision)
                #     print("rewards")
                #     mdp_env.print_rewards()
                #     print("value function")

                #     mdp_env.print_map(V)
                #     print("mdp features")
                #     utils.display_onehot_state_features(mdp_env)

                #     print("optimal policy")
                #     mdp_env.print_map(mdp_env.to_arrows(opt_policy))

                #     print("halfspace")
                #     print(candidate_halfspaces[i])
                #get the halfspaces induced by an optimal policy in this MDP
                constraints_new = candidate_halfspaces[i]

                count = self.count_new_covers(constraints_new, constraint_set,
                                              covered)
                #if self.debug: print("covered", count)
                if count > max_count:
                    max_count = count
                    constraints_to_add = constraints_new
                    best_mdp = mdp_env
                    if self.debug:
                        print()
                        print("best mdp so far")
                        print("-" * 20)
                        print("MDP", i)

                        V = mdp.value_iteration(mdp_env,
                                                epsilon=self.precision)
                        Qopt = mdp.compute_q_values(mdp_env,
                                                    V=V,
                                                    eps=self.precision)
                        opt_policy = mdp.find_optimal_policy(
                            mdp_env, Q=Qopt, epsilon=self.precision)
                        print("rewards")
                        mdp_env.print_rewards()
                        print("value function")

                        mdp_env.print_map(V)
                        print("mdp features")
                        utils.display_onehot_state_features(mdp_env)

                        print("optimal policy")
                        mdp_env.print_map(mdp_env.to_arrows(opt_policy))

                        print("halfspace")
                        print(constraints_to_add)

                        print("covered", count)

            #update covered flags and add best_traj to demo`
            opt_mdps.append(best_mdp)
            covered = self.update_covered_constraints(constraints_to_add,
                                                      constraint_set, covered)
            total_covered += max_count
            #TODO: optimize by removing trajs if we decide to add to opt_demos

        return opt_mdps