Example #1
0
    def resolve_pomdp(self):
        start = self.current_belief

        action_prob_knowledge_gain_mult = self.action_prob_knowledge_gain_mult

        self.T = generate_transition_matrix(
            num_knowledge_levels=self.num_knowledge_levels,
            num_engagement_levels=self.num_engagement_levels,
            num_attempts=self.num_attempts,
            prob_knowledge_gain=self.prob_knowledge_gain,
            prob_engagement_gain=self.prob_engagement_gain,
            prob_engagement_loss=self.prob_engagement_loss,
            action_prob_knowledge_gain_mult=self.
            action_prob_knowledge_gain_mult,
            action_prob_engagement_gain_mult=self.
            action_prob_engagement_gain_mult,
            prob_correct_answer=self.prob_correct_answer,
            prob_correct_answer_after_1_attempt=self.
            prob_correct_answer_after_1_attempt,
            prob_drop_for_low_engagement=self.prob_drop_for_low_engagement)

        simple_pomdp = POMDP(self.T,
                             self.O,
                             self.R,
                             np.array(start),
                             self.discount,
                             states=self.all_states,
                             actions=self.actions,
                             observations=self.all_obs,
                             values='reward')

        self.simple_pomdp_graph_policy = simple_pomdp.solve(method='grid',
                                                            verbose=False,
                                                            n_iterations=500)

        self.simple_pomdp_graph_policy_belief_runner = GraphPolicyBeliefRunner(
            self.simple_pomdp_graph_policy, simple_pomdp)

        self.current_belief = self.simple_pomdp_graph_policy_belief_runner.current_belief  #should be the same as start?
        print "current belief is: "
        print self.current_belief
        self.action = self.simple_pomdp_graph_policy_belief_runner.get_action(
        )  #choose first action
def test_command_line_sequence(param_file):
    #read in params
    with open(param_file) as data_file:
        params = json.load(data_file)

    # discount factor
    discount = params["discount"]

    # state variables
    knowledge_states = params["knowledge_states"]
    engagement_states = params["engagement_states"]
    attempt_states = params["attempt_states"]
    num_knowledge_levels = len(knowledge_states)
    num_engagement_levels = len(engagement_states)
    num_attempts = len(attempt_states)
    all_states = combine_states_to_one_list(knowledge_states,
                                            engagement_states, attempt_states)
    num_states = len(all_states)

    # starting distribution
    start = np.zeros(num_states)
    num_start_states = num_knowledge_levels * num_engagement_levels
    for i in range(num_states):
        if i % num_attempts == 0:
            start[i] = 1.0 / float(num_start_states)
        else:
            start[i] = 0.0

    # probabilities associated with the transition matrix
    prob_knowledge_gain = params["prob_knowledge_gain"]
    prob_engagement_gain = params["prob_engagement_gain"]
    prob_engagement_loss = params["prob_engagement_loss"]
    prob_correct_answer = params["prob_correct_answer"]
    prob_correct_answer_after_1_attempt = params[
        "prob_correct_answer_after_1_attempt"]
    prob_drop_for_low_engagement = params["prob_drop_for_low_engagement"]

    # actions
    actions = params["actions"]
    num_actions = len(actions)

    # action-related reward variables
    action_rewards = params["action_rewards"]
    engagement_reward = params["engagement_reward"]
    knowledge_reward = params["knowledge_reward"]
    end_state_remain_reward = params["end_state_remain_reward"]
    reward_for_first_attempt_actions = params[
        "reward_for_first_attempt_actions"]
    action_prob_knowledge_gain_mult = params["action_prob_knowledge_gain_mult"]
    action_prob_engagement_gain_mult = params[
        "action_prob_engagement_gain_mult"]

    # observations
    correctness_obs = params["correctness_obs"]
    speed_obs = params["speed_obs"]
    all_obs = combine_obs_types_to_one_list(correctness_obs, speed_obs)
    num_observations = len(all_obs)

    # observation related variables
    prob_speeds_for_low_engagement = params["prob_speeds_for_low_engagement"]
    prob_speeds_for_high_engagement = params["prob_speeds_for_high_engagement"]
    action_speed_multipliers = np.array(params["action_speed_multipliers"])

    R = generate_reward_matrix(
        actions=actions,
        action_rewards=action_rewards,
        engagement_reward=engagement_reward,
        knowledge_reward=knowledge_reward,
        end_state_remain_reward=end_state_remain_reward,
        num_knowledge_levels=num_knowledge_levels,
        num_engagement_levels=num_engagement_levels,
        num_attempts=num_attempts,
        num_observations=num_observations,
        reward_for_first_attempt_actions=reward_for_first_attempt_actions)

    T = generate_transition_matrix(
        num_knowledge_levels=num_knowledge_levels,
        num_engagement_levels=num_engagement_levels,
        num_attempts=num_attempts,
        prob_knowledge_gain=prob_knowledge_gain,
        prob_engagement_gain=prob_engagement_gain,
        prob_engagement_loss=prob_engagement_loss,
        action_prob_knowledge_gain_mult=action_prob_knowledge_gain_mult,
        action_prob_engagement_gain_mult=action_prob_engagement_gain_mult,
        prob_correct_answer=prob_correct_answer,
        prob_correct_answer_after_1_attempt=prob_correct_answer_after_1_attempt,
        prob_drop_for_low_engagement=prob_drop_for_low_engagement)

    O = generate_observation_matrix(
        knowledge_states=knowledge_states,
        engagement_states=engagement_states,
        attempt_states=attempt_states,
        correctness_obs=correctness_obs,
        speed_obs=speed_obs,
        num_actions=num_actions,
        prob_speeds_for_low_engagement=prob_speeds_for_low_engagement,
        prob_speeds_for_high_engagement=prob_speeds_for_high_engagement,
        action_speed_multipliers=action_speed_multipliers)

    #create POMDP model
    simple_pomdp = POMDP(T,
                         O,
                         R,
                         np.array(start),
                         discount,
                         states=all_states,
                         actions=actions,
                         observations=all_obs,
                         values='reward')

    simple_pomdp_graph_policy = simple_pomdp.solve(method='grid',
                                                   verbose=False,
                                                   n_iterations=500)

    simple_pomdp_graph_policy_belief_runner = GraphPolicyBeliefRunner(
        simple_pomdp_graph_policy, simple_pomdp)

    num_states_per_knowledge_level = num_engagement_levels * num_attempts
    problem_num = 1
    attempt_num = 1
    receiving_obs = True
    while receiving_obs is True:
        obs = input("Enter observation: ")
        if obs == "done":
            receiving_obs = False
            break
        if obs not in all_obs:
            print("Invalid observation provided\n")
            continue
        knowledge_level_index = 0
        action = simple_pomdp_graph_policy_belief_runner.get_action()
        current_belief = simple_pomdp_graph_policy_belief_runner.step(
            obs, action)
        print("\nProblem %i, Attempt %i: (%s, %s)" %
              (problem_num, attempt_num, action, obs))

        belief_str = ""
        sum_across_states = 0.0
        for k in range(num_states):
            sum_across_states += current_belief[k]
            if k % num_attempts == num_attempts - 1:
                belief_str += "%s: %.3f\t\t" % (all_states[k][:-3],
                                                sum_across_states)
                knowledge_level_index += 1
                sum_across_states = 0.0
            if k % num_states_per_knowledge_level == num_states_per_knowledge_level - 1:
                belief_str += "\n"

        print(belief_str)

        if "R" in obs or attempt_num == 3:
            problem_num += 1
            attempt_num = 1
        else:
            attempt_num += 1
Example #3
0
R_END = 0.1
LOOP = False
PLOT = False

# Convert the task into a POMDP

h2p = HTMToPOMDP(T_WAIT, T_ASK, T_TELL, C_INTR, end_reward=R_END, loop=LOOP)
p = h2p.task_to_pomdp(stool_task_sequential)

gp = p.solve(method='grid', n_iterations=500, verbose=True)
gp.save_as_json(
    os.path.join(os.path.dirname(__file__),
                 '../visualization/policy/json/icra.json'))

pol = GraphPolicyBeliefRunner(gp, p)
pol.save_trajectories_from_starts(os.path.join(
    os.path.dirname(__file__),
    '../visualization/trajectories/json/trajectories.json'),
                                  horizon=10,
                                  indent=2)
gp2 = pol.visit()
gp2.save_as_json(
    os.path.join(os.path.dirname(__file__),
                 '../visualization/policy/json/from_beliefs.json'))


def plot_values(values, actions, p):
    b = values - values.min()
    b /= b.max(-1)[:, None]
    plot = plot_beliefs(
R_END = 0.1
LOOP = False
PLOT = False

# Convert the task into a POMDP

h2p = HTMToPOMDP(T_WAIT, T_ASK, T_TELL, C_INTR, end_reward=R_END, loop=LOOP)
p = h2p.task_to_pomdp(stool_task_sequential)

gp = p.solve(method='grid', n_iterations=500, verbose=True)
gp.save_as_json(os.path.join(os.path.dirname(__file__),
                             '../visualization/policy/json/icra.json'))


pol = GraphPolicyBeliefRunner(gp, p)
pol.save_trajectories_from_starts(
    os.path.join(
        os.path.dirname(__file__),
        '../visualization/trajectories/json/trajectories.json'),
    horizon=10, indent=2)
gp2 = pol.visit()
gp2.save_as_json(os.path.join(
    os.path.dirname(__file__),
    '../visualization/policy/json/from_beliefs.json'))


def plot_values(values, actions, p):
    b = values - values.min()
    b /= b.max(-1)[:, None]
    plot = plot_beliefs(b, states=p.states, xlabels_rotation=45,
Example #5
0
def test_command_line_sequence(self, param_file):
    #read in params
    #pdb.set_trace()

    # discount factor
    discount = rospy.get_param('discount')
    print(discount)

    # state variables
    knowledge_states = rospy.get_param('knowledge_states')
    engagement_states = rospy.get_param('engagement_states')
    attempt_states = rospy.get_param('attempt_states')
    num_knowledge_levels = len(knowledge_states)
    num_engagement_levels = len(engagement_states)
    num_attempts = len(attempt_states)
    all_states = combine_states_to_one_list(knowledge_states,
                                            engagement_states, attempt_states)
    num_states = len(all_states)

    # starting distribution
    start = np.zeros(num_states)
    num_start_states = num_knowledge_levels * num_engagement_levels
    for i in range(num_states):
        if i % num_attempts == 0:
            start[i] = 1.0 / float(num_start_states)
        else:
            start[i] = 0.0

    # probabilities associated with the transition matrix
    prob_knowledge_gain = rospy.get_param('prob_knowledge_gain')
    prob_engagement_gain = rospy.get_param('prob_engagement_gain')
    prob_engagement_loss = rospy.get_param('prob_engagement_loss')
    prob_correct_answer = rospy.get_param('prob_correct_answer')
    prob_correct_answer_after_1_attempt = rospy.get_param(
        'prob_correct_answer_after_1_attempt')
    prob_drop_for_low_engagement = rospy.get_param(
        'prob_drop_for_low_engagement')

    # actions
    actions = rospy.get_param('actions')
    num_actions = len(actions)

    # action-related reward variables
    action_rewards = rospy.get_param('action_rewards')
    engagement_reward = rospy.get_param('engagement_reward')
    knowledge_reward = rospy.get_param('knowledge_reward')
    end_state_remain_reward = rospy.get_param('end_state_remain_reward')
    reward_for_first_attempt_actions = rospy.get_param(
        'reward_for_first_attempt_actions')
    action_prob_knowledge_gain_mult = rospy.get_param(
        'action_prob_knowledge_gain_mult')
    action_prob_engagement_gain_mult = rospy.get_param(
        'action_prob_engagement_gain_mult')

    # observations
    correctness_obs = rospy.get_param('correctness_obs')
    speed_obs = rospy.get_param('speed_obs')
    all_obs = combine_obs_types_to_one_list(correctness_obs, speed_obs)
    num_observations = len(all_obs)

    # observation related variables
    prob_speeds_for_low_engagement = rospy.get_param(
        'prob_speeds_for_low_engagement')
    prob_speeds_for_high_engagement = rospy.get_param(
        'prob_speeds_for_high_engagement')
    action_speed_multipliers = np.array(
        rospy.get_param('action_speed_multipliers'))

    R = generate_reward_matrix(
        actions=actions,
        action_rewards=action_rewards,
        engagement_reward=engagement_reward,
        knowledge_reward=knowledge_reward,
        end_state_remain_reward=end_state_remain_reward,
        num_knowledge_levels=num_knowledge_levels,
        num_engagement_levels=num_engagement_levels,
        num_attempts=num_attempts,
        num_observations=num_observations,
        reward_for_first_attempt_actions=reward_for_first_attempt_actions)

    T = generate_transition_matrix(
        num_knowledge_levels=num_knowledge_levels,
        num_engagement_levels=num_engagement_levels,
        num_attempts=num_attempts,
        prob_knowledge_gain=prob_knowledge_gain,
        prob_engagement_gain=prob_engagement_gain,
        prob_engagement_loss=prob_engagement_loss,
        action_prob_knowledge_gain_mult=action_prob_knowledge_gain_mult,
        action_prob_engagement_gain_mult=action_prob_engagement_gain_mult,
        prob_correct_answer=prob_correct_answer,
        prob_correct_answer_after_1_attempt=prob_correct_answer_after_1_attempt,
        prob_drop_for_low_engagement=prob_drop_for_low_engagement)

    O = generate_observation_matrix(
        knowledge_states=knowledge_states,
        engagement_states=engagement_states,
        attempt_states=attempt_states,
        correctness_obs=correctness_obs,
        speed_obs=speed_obs,
        num_actions=num_actions,
        prob_speeds_for_low_engagement=prob_speeds_for_low_engagement,
        prob_speeds_for_high_engagement=prob_speeds_for_high_engagement,
        action_speed_multipliers=action_speed_multipliers)

    #create POMDP model
    simple_pomdp = POMDP(T,
                         O,
                         R,
                         np.array(start),
                         discount,
                         states=all_states,
                         actions=actions,
                         observations=all_obs,
                         values='reward')

    simple_pomdp_graph_policy = simple_pomdp.solve(method='grid',
                                                   verbose=False,
                                                   n_iterations=500)

    simple_pomdp_graph_policy_belief_runner = GraphPolicyBeliefRunner(
        simple_pomdp_graph_policy, simple_pomdp)

    num_states_per_knowledge_level = num_engagement_levels * num_attempts
    problem_num = 1
    attempt_num = 1
    #receiving_obs = True
    observations = ['R-fast', 'R-med', 'R-slow', 'W-fast', 'W-med', 'W-slow']
    for obs in observations:
        #while receiving_obs is True:
        obs = random.choice(observations)
        #raw_input("Enter observation: ")
        if obs == "done":
            receiving_obs = False
            break
        if obs not in all_obs:
            print "Invalid observation provided\n"
            continue
        knowledge_level_index = 0
        action = simple_pomdp_graph_policy_belief_runner.get_action()
        current_belief = simple_pomdp_graph_policy_belief_runner.step(obs)
        print "\nProblem %i, Attempt %i: (%s, %s)" % (problem_num, attempt_num,
                                                      action, obs)

        belief_str = ""
        sum_across_states = 0.0
        for k in range(num_states):
            sum_across_states += current_belief[k]
            if k % num_attempts == num_attempts - 1:
                belief_str += "%s: %.3f\t\t" % (all_states[k][:-3],
                                                sum_across_states)
                knowledge_level_index += 1
                sum_across_states = 0.0
            if k % num_states_per_knowledge_level == num_states_per_knowledge_level - 1:
                belief_str += "\n"

        print belief_str
        self.pomdp_action = action
        # list out keys and values separately
        self.key_list = list(pomdp_dic.keys())
        self.val_list = list(pomdp_dic.values())
        print(self.val_list[self.key_list.index(self.pomdp_action)])

        if "R" in obs or attempt_num == 3:
            problem_num += 1
            attempt_num = 1
        else:
            attempt_num += 1
Example #6
0
class TutoringModel:
    def __init__(self):
        self.total_num_questions = 0
        self.current_question = 0
        self.level = 1

        self.pid = -1
        self.sessionNum = -1
        self.expGroup = -1  #0 is control (fixed policy), 1 is our experimental condition (pomdp policy)
        self.difficultyGroup = -1
        self.logFile = None

        self.tries = 0

        self.attempt_times = []
        #self.total_num_help_actions = 0
        self.fixed_help_index = 0

        self.initial_knowledge_state = ""

        self.loadSession = 0
        self.inSession = False

        #placeholder to hold pomdp model variables: current action, belief runner, etc --> initialize during START msg

        rospy.init_node('dummy_model', anonymous=True)
        self.decisons_pub = rospy.Publisher('model_decision_msg',
                                            ControlMsg,
                                            queue_size=10)
        self.rate = rospy.Rate(10)  # 10hz

        rospy.Subscriber("tablet_msg", TabletMsg, self.tablet_msg_callback
                         )  # subscribe to messages from the tablet node
        rospy.Subscriber(
            "robot_speech_msg", String,
            self.robot_msg_callback)  # subscribe to messages from the robot

        # import all question jsons. Note: the tablet app should have access to the same files
        level_one_questions = []
        with open(
                rospack.get_path('nao_tutoring_behaviors') +
                "/scripts/data/level1.json", 'r') as question_file_1:
            level_one_questions = json.load(question_file_1)
        level_two_questions = []
        with open(
                rospack.get_path('nao_tutoring_behaviors') +
                "/scripts/data/level2.json", 'r') as question_file_2:
            level_two_questions = json.load(question_file_2)
        level_three_questions = []
        with open(
                rospack.get_path('nao_tutoring_behaviors') +
                "/scripts/data/level3.json", 'r') as question_file_3:
            level_three_questions = json.load(question_file_3)
        with open(
                rospack.get_path('nao_tutoring_behaviors') +
                "/scripts/data/level4.json", 'r') as question_file_4:
            level_four_questions = json.load(question_file_4)
        with open(
                rospack.get_path('nao_tutoring_behaviors') +
                "/scripts/data/level5.json", 'r') as question_file_5:
            level_five_questions = json.load(question_file_5)

        self.questions = [[], level_one_questions, level_two_questions,
                          level_three_questions]
        self.harder_questions = [[], [], [], level_three_questions,
                                 level_four_questions, level_five_questions]

    def setup_pomdp(self):
        param_file = rospack.get_path(
            'nao_tutoring_behaviors'
        ) + "/scripts/data/03_13_B.json"  #the param file that "works" for the base model
        with open(param_file) as data_file:
            params = json.load(data_file)

        # discount factor
        self.discount = params["discount"]

        # state variables
        knowledge_states = params["knowledge_states"]
        engagement_states = params["engagement_states"]
        attempt_states = params["attempt_states"]
        self.num_knowledge_levels = len(knowledge_states)
        self.num_engagement_levels = len(engagement_states)
        self.num_attempts = len(attempt_states)
        self.all_states = combine_states_to_one_list(knowledge_states,
                                                     engagement_states,
                                                     attempt_states)
        self.num_states = len(self.all_states)

        # starting distribution
        start = np.zeros(self.num_states)
        if self.sessionNum == 1 and self.loadSession == 0:
            initial_state = int(self.initial_knowledge_state[1:])
            majority_start = 0.7
            minority_start = (1.0 -
                              majority_start) / (self.num_knowledge_levels - 1)
            for i in range(self.num_knowledge_levels):
                #start[4 + i * 8] = 1.0 / float(self.num_knowledge_levels) #uniform start state
                if i == initial_state:
                    start[4 + i * 8] = majority_start
                else:
                    start[4 + i * 8] = minority_start

            self.action_prob_knowledge_gain_mult = params[
                "action_prob_knowledge_gain_mult"]
            self.action_prob_engagement_gain_mult = params[
                "action_prob_engagement_gain_mult"]

        else:
            start = self.current_belief
            self.action_prob_knowledge_gain_mult = self.action_prob_knowledge_gain_mult  #should be set earlier
            self.action_prob_engagement_gain_mult = self.action_prob_engagement_gain_mult

        # probabilities associated with the transition matrix
        self.prob_knowledge_gain = params["prob_knowledge_gain"]
        self.prob_engagement_gain = params["prob_engagement_gain"]
        self.prob_engagement_loss = params["prob_engagement_loss"]
        self.prob_correct_answer = params["prob_correct_answer"]
        self.prob_correct_answer_after_1_attempt = params[
            "prob_correct_answer_after_1_attempt"]
        self.prob_drop_for_low_engagement = params[
            "prob_drop_for_low_engagement"]

        # actions
        self.actions = params["actions"]
        self.num_actions = len(self.actions)

        # action-related reward variables
        action_rewards = params["action_rewards"]
        engagement_reward = params["engagement_reward"]
        knowledge_reward = params["knowledge_reward"]
        end_state_remain_reward = params["end_state_remain_reward"]
        reward_for_first_attempt_actions = params[
            "reward_for_first_attempt_actions"]
        #action_prob_knowledge_gain_mult = params["action_prob_knowledge_gain_mult"]
        #self.action_prob_engagement_gain_mult = params["action_prob_engagement_gain_mult"]

        # observations
        correctness_obs = params["correctness_obs"]
        speed_obs = params["speed_obs"]
        self.all_obs = combine_obs_types_to_one_list(correctness_obs,
                                                     speed_obs)
        self.num_observations = len(self.all_obs)

        # observation related variables
        self.prob_speeds_for_low_engagement = params[
            "prob_speeds_for_low_engagement"]
        self.prob_speeds_for_high_engagement = params[
            "prob_speeds_for_high_engagement"]
        action_speed_multipliers = np.array(params["action_speed_multipliers"])

        self.R = generate_reward_matrix(
            actions=self.actions,
            action_rewards=action_rewards,
            engagement_reward=engagement_reward,
            knowledge_reward=knowledge_reward,
            end_state_remain_reward=end_state_remain_reward,
            num_knowledge_levels=self.num_knowledge_levels,
            num_engagement_levels=self.num_engagement_levels,
            num_attempts=self.num_attempts,
            num_observations=self.num_observations,
            reward_for_first_attempt_actions=reward_for_first_attempt_actions)

        self.T = generate_transition_matrix(
            num_knowledge_levels=self.num_knowledge_levels,
            num_engagement_levels=self.num_engagement_levels,
            num_attempts=self.num_attempts,
            prob_knowledge_gain=self.prob_knowledge_gain,
            prob_engagement_gain=self.prob_engagement_gain,
            prob_engagement_loss=self.prob_engagement_loss,
            action_prob_knowledge_gain_mult=self.
            action_prob_knowledge_gain_mult,
            action_prob_engagement_gain_mult=self.
            action_prob_engagement_gain_mult,
            prob_correct_answer=self.prob_correct_answer,
            prob_correct_answer_after_1_attempt=self.
            prob_correct_answer_after_1_attempt,
            prob_drop_for_low_engagement=self.prob_drop_for_low_engagement)

        self.O = generate_observation_matrix(
            knowledge_states=knowledge_states,
            engagement_states=engagement_states,
            attempt_states=attempt_states,
            correctness_obs=correctness_obs,
            speed_obs=speed_obs,
            num_actions=self.num_actions,
            prob_speeds_for_low_engagement=self.prob_speeds_for_low_engagement,
            prob_speeds_for_high_engagement=self.
            prob_speeds_for_high_engagement,
            action_speed_multipliers=action_speed_multipliers)

        #create POMDP model
        simple_pomdp = POMDP(self.T,
                             self.O,
                             self.R,
                             np.array(start),
                             self.discount,
                             states=self.all_states,
                             actions=self.actions,
                             observations=self.all_obs,
                             values='reward')

        self.simple_pomdp_graph_policy = simple_pomdp.solve(method='grid',
                                                            verbose=False,
                                                            n_iterations=500)

        self.simple_pomdp_graph_policy_belief_runner = GraphPolicyBeliefRunner(
            self.simple_pomdp_graph_policy, simple_pomdp)

        self.current_belief = self.simple_pomdp_graph_policy_belief_runner.current_belief  #should be the same as start?
        print "current belief is: "
        print self.current_belief
        self.action = self.simple_pomdp_graph_policy_belief_runner.get_action(
        )  #choose first action

    def resolve_pomdp(self):
        start = self.current_belief

        action_prob_knowledge_gain_mult = self.action_prob_knowledge_gain_mult

        self.T = generate_transition_matrix(
            num_knowledge_levels=self.num_knowledge_levels,
            num_engagement_levels=self.num_engagement_levels,
            num_attempts=self.num_attempts,
            prob_knowledge_gain=self.prob_knowledge_gain,
            prob_engagement_gain=self.prob_engagement_gain,
            prob_engagement_loss=self.prob_engagement_loss,
            action_prob_knowledge_gain_mult=self.
            action_prob_knowledge_gain_mult,
            action_prob_engagement_gain_mult=self.
            action_prob_engagement_gain_mult,
            prob_correct_answer=self.prob_correct_answer,
            prob_correct_answer_after_1_attempt=self.
            prob_correct_answer_after_1_attempt,
            prob_drop_for_low_engagement=self.prob_drop_for_low_engagement)

        simple_pomdp = POMDP(self.T,
                             self.O,
                             self.R,
                             np.array(start),
                             self.discount,
                             states=self.all_states,
                             actions=self.actions,
                             observations=self.all_obs,
                             values='reward')

        self.simple_pomdp_graph_policy = simple_pomdp.solve(method='grid',
                                                            verbose=False,
                                                            n_iterations=500)

        self.simple_pomdp_graph_policy_belief_runner = GraphPolicyBeliefRunner(
            self.simple_pomdp_graph_policy, simple_pomdp)

        self.current_belief = self.simple_pomdp_graph_policy_belief_runner.current_belief  #should be the same as start?
        print "current belief is: "
        print self.current_belief
        self.action = self.simple_pomdp_graph_policy_belief_runner.get_action(
        )  #choose first action

    def get_new_multipliers(self, obs, action):
        print "returning existing multipliers that are not changing for now"
        return self.action_prob_knowledge_gain_mult, self.action_prob_engagement_gain_mult

    def repeat_question(
        self
    ):  # send this message to have the student try the same question again
        control_message = ControlMsg()  # with no tutoring behavior
        control_message.nextStep = "QUESTION-REPEAT"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = ""  #Try that again." #we don't want to say anything here.

        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def set_up_session(self):
        control_message = ControlMsg()
        control_message.nextStep = "SETUP-SESSION"
        control_message.otherInfo = str(self.expGroup)

        #self.decisons_pub.publish(control_message)
        #print "sent: ", control_message

    def first_question(self):
        self.total_num_questions += 1
        self.tries = 0
        control_message = ControlMsg()
        control_message.nextStep = "QUESTION-FIRST"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = self.questions[self.level][
            self.current_question]['Spoken Question']

        print self.level, self.current_question, self.questions[self.level][
            self.current_question]

        #time.sleep(3) #wait a little before sending first question message so robot can finish intro
        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def next_question(
        self
    ):  # indicates that the student should move on to the next question
        #self.total_num_questions += 1                           # keeps track of the total number of questions student has seen
        self.tries = 0  # reset the number of attempts on the question
        self.level = self.total_num_questions % 3
        if self.difficultyGroup == 0:
            self.level += 1
        else:
            self.level += 3

        if self.total_num_questions % 3 == 0:
            self.current_question += 1

        self.total_num_questions += 1

        if (self.current_question >= len(self.questions[self.level])):
            print "this should only happen if student has really finished all questions"
            #return self.question_next_level()

        control_message = ControlMsg()
        control_message.nextStep = "QUESTION-NEXT"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        if (self.current_question < len(self.questions[self.level])):
            control_message.robotSpeech = self.questions[self.level][
                self.current_question][
                    'Spoken Question']  # we give the text for this question  to the robot here
        else:
            control_message.robotSpeech = ""  #no speech in case they finish all questions
        #print self.level, self.current_question, self.questions[self.level][self.current_question]

        time.sleep(
            3
        )  #wait a little before sending next question message so robot can say correct/incorrect
        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def question_next_level(
        self
    ):  #aditi - no longer using         # indicates that the student should move to the next level
        self.tries = 0  # so we increase the level number and go to question 1
        self.current_question = 1
        self.level += 1

        control_message = ControlMsg()
        control_message.nextStep = "QUESTION-LEVEL"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = self.questions[self.level][
            self.current_question][
                'Spoken Question']  # give the text for the question to the robot

        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def send_next_question(self):  # THIS IS NOT USED
        control_message = ControlMsg()
        control_message.nextStep = "QUESTION-NEXT"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = self.questions[self.level][
            self.current_question]['Spoken Question']

    def tic_tac_toe_break(self):  # trigger a game of tic tac toe for a break
        control_message = ControlMsg(
        )  # the robot message here is the speech for the beginning of the game
        control_message.nextStep = "TICTACTOE"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = "Lets take a break and play a game of tic-tac-toe. You will be exes, and I will be ohs. You can go first. Click any square on the board."

        question_id = self.questions[self.level][
            self.current_question]['QuestionID']
        self.log_transaction("TICTACTOE-START", question_id, self.level)
        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def give_hint(self):
        control_message = ControlMsg()
        control_message.nextStep = "SHOWHINT"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = ""

        question_id = self.questions[self.level][
            self.current_question]['QuestionID']
        self.log_transaction("HINT", question_id, self.level)
        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def give_structure_hint(
            self):  # give a hint in the form of the structure of the problem
        control_message = ControlMsg()
        control_message.nextStep = "SHOWSTRUCTURE"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = "Let's look at how to structure a solution"

        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def give_think_aloud(self):  # ask the student to think aloud
        control_message = ControlMsg()
        control_message.nextStep = "THINKALOUD"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = ""  #"What is the first thing you want to do to solve this problem?"

        question_id = self.questions[self.level][
            self.current_question]['QuestionID']
        self.log_transaction("THINKALOUD", question_id, self.level)
        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def give_example(
        self
    ):  # give a worked example - what example is given is determined by node_tablet code
        control_message = ControlMsg(
        )  # and is based on the level. node_tablet controls all tablet and robot actions
        control_message.nextStep = "SHOWEXAMPLE"  # during this
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = ""

        question_id = self.questions[self.level][
            self.current_question]['QuestionID']
        self.log_transaction("WORKED-EXAMPLE", question_id, self.level)
        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def give_tutorial(
        self
    ):  # give an interactive tutorial. like the worked example, this is controled by node_tablet
        control_message = ControlMsg()
        control_message.nextStep = "SHOWTUTORIAL"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = ""

        question_id = self.questions[self.level][
            self.current_question]['QuestionID']
        self.log_transaction("INTERACTIVE-TUTORIAL", question_id, self.level)
        self.decisons_pub.publish(control_message)
        print "sent:", control_message

    def no_action(self):
        control_message = ControlMsg()
        control_message.nextStep = "NOACTION"
        control_message.questionNum = self.current_question
        control_message.questionLevel = self.level
        control_message.robotSpeech = ""

        question_id = self.questions[self.level][
            self.current_question]['QuestionID']
        self.log_transaction("NO-ACTION", question_id, self.level)
        self.decisons_pub.publish(control_message)
        print "sent: ", control_message

    def get_mean_and_std_time(self):
        num_attempts_in_list = len(self.attempt_times)
        if num_attempts_in_list > 10:
            cutoff = int(.1 * num_attempts_in_list)
            print "cutoff is: " + str(cutoff)
        else:
            cutoff = 1

        front_index = cutoff
        back_index = num_attempts_in_list - cutoff
        mean = np.mean(self.attempt_times[front_index:back_index])
        std = np.std(self.attempt_times[front_index:back_index])
        print "mean is: " + str(mean)
        print "std is: " + str(std)
        return mean, std

    def form_observation(self, msgType, timing):
        obs = ""
        if msgType == 'CA':
            obs += "R-"
        elif msgType == 'IA':
            obs += "W-"
        else:
            print "should not be here"
            return ""

        timing = float(timing) / 1000.0
        print "timing in seconds is: " + str(timing)

        if len(
                self.attempt_times
        ) < 10:  #changed to 10 so that we get 10 data points before calculating fast or slow
            obs += "med"

        else:
            mean, std = self.get_mean_and_std_time()

            zscore = float((timing - mean)) / float(std)
            print "zscore is: " + str(zscore)

            if zscore > 2.0:
                obs += "slow"
            elif zscore < -1.0:
                obs += "fast"
            else:
                obs += "med"

        return obs

    def add_attempt_time(self, timing):
        print "use this method to add timing to list of times"
        timing = float(
            timing) / 1000.0  #convert time in milliseconds to seconds
        if len(self.attempt_times) < 5:
            self.attempt_times.append(timing)
            self.attempt_times = sorted(self.attempt_times)
        else:
            bisect.insort(self.attempt_times, timing)
        print self.attempt_times

    def log_transaction(self, msgType, questionID, otherInfo):
        transaction = str(self.pid) + "," + str(self.expGroup) + "," + str(
            self.sessionNum) + ","
        #transaction += str(datetime.datetime.now()) + ","
        transaction += str(int(round(time.time() * 1000))) + ","
        transaction += str(questionID) + ","
        transaction += msgType + ","
        transaction += str(
            otherInfo)  # put the level here for msgType==QUESTION
        self.logFile.write(transaction + "\n")
        self.logFile.flush()

    def log_multipliers(self, msgType, questionID, multipliers):
        transaction = str(self.pid) + "," + str(self.expGroup) + "," + str(
            self.sessionNum) + ","
        transaction += str(int(round(time.time() * 1000))) + ","
        transaction += str(questionID) + ","
        transaction += msgType + ","
        transaction += str(multipliers)
        self.logFile.write(transaction + "\n")
        self.logFile.flush()

    def tablet_msg_callback(
            self, data
    ):  # respond to tablet messages by triggering the next behavior
        rospy.loginfo(
            rospy.get_caller_id() + " From Tablet, I heard %s ",
            data)  # the code here is just based off the question number, but
        # the real model can similarly respond to whether or not the
        # answer was correct and call one of these functions to produce a behavior

        #first check if it was a correct or incorrect answer --> do the same thing for both
        #with any attempt, we need to get the obs then update the model. then we check msgType
        #again and go to next question if correct and provide help-action from model if incorrect
        if (data.msgType == 'CA' or data.msgType == 'IA'):
            question_id = self.questions[self.level][
                self.current_question]['QuestionID']
            attempt = data.otherInfo.split("-")[0]
            timing = int(data.otherInfo.split("-")[1])
            take_break = data.otherInfo.split("-")[2]
            observation = self.form_observation(data.msgType, timing)
            self.log_transaction("OBSERVATION", question_id, observation)
            print "observation is: " + str(observation)
            #placeholder to update belief using action that was just given and this observation
            self.add_attempt_time(timing)
            #placeholder to potentially sleep here if we want model to wait a few seconds before giving help

            if self.expGroup == 1:
                self.current_belief = self.simple_pomdp_graph_policy_belief_runner.step(
                    observation)
                self.current_belief = self.simple_pomdp_graph_policy_belief_runner.current_belief
                print "current belief is: "
                print self.current_belief
                #before we get the next action, lets change our action multipliers and re-solve the pomdp
                self.action = self.simple_pomdp_graph_policy_belief_runner.get_action(
                )
                #self.action_prob_knowledge_gain_mult, self.action_prob_engagement_gain_mult = self.get_new_multipliers(observation, self.action)
                #self.log_multipliers("KNOWLEDGE-MULT", question_id, self.action_prob_knowledge_gain_mult)
                #self.log_multipliers("ENGAGEMENT-MULT", question_id, self.action_prob_engagement_gain_mult)

        if (data.msgType == 'CA'):  # respond to correct answer
            self.fixed_help_index = 0
            attempt = data.otherInfo.split("-")[0]
            timing = int(data.otherInfo.split("-")[1])
            self.log_transaction("CORRECT", question_id,
                                 str(attempt) + "-" + str(timing))
            time.sleep(2)
            self.next_question()
            #self.resolve_pomdp()
            #self.action = self.simple_pomdp_graph_policy_belief_runner.get_action()

        elif (data.msgType == 'IA'):  # respond to incorrect answer
            attempt = data.otherInfo.split("-")[0]
            timing = int(data.otherInfo.split("-")[1])
            take_break = data.otherInfo.split("-")[2]
            self.log_transaction("INCORRECT", question_id,
                                 str(attempt) + "-" + str(timing))
            self.tries += 1

            #placeholder to get action from model then execute that action
            #check what the action is. then check experimental condition.
            #for control, log the model's action and execute action from fixed policy
            #for experimental, log the model's action, then do it.
            if self.expGroup == 1:
                if (self.tries >= 3):
                    time.sleep(2)
                    self.next_question()
                    #self.resolve_pomdp()
                    #self.action = self.simple_pomdp_graph_policy_belief_runner.get_action()

                else:

                    #self.resolve_pomdp()
                    #self.action = self.simple_pomdp_graph_policy_belief_runner.get_action()
                    print "DURING QUESTION, model will give this action: " + str(
                        self.action)
                    #below, trying a time.sleep(2) because there is no lag when we dont resolve the pomdp.
                    time.sleep(
                        2
                    )  #lets try not sleeping here for MODEL GROUP since we have the pomdp resolve time lag
                    if self.action == "no-action":
                        self.no_action()
                    elif self.action == "interactive-tutorial":
                        self.give_tutorial()
                    elif self.action == "worked-example":
                        self.give_example()
                    elif self.action == "hint":
                        self.give_hint()
                    elif self.action == "think-aloud":
                        self.give_think_aloud()
                    elif self.action == "break":
                        self.tic_tac_toe_break()
                    else:
                        print "error: model choosing action not in list"

            else:  #this is the block that executes for the fixed group.

                if (self.tries >= 3):
                    time.sleep(2)
                    self.next_question()

                else:
                    #self.give_tutorial()
                    #self.give_think_aloud()
                    #self.give_hint()
                    time.sleep(5)  #lets wait a little before giving help

                    if self.expGroup == 0:  #implement fixed policy
                        if take_break == "takebreak":
                            self.tic_tac_toe_break()

                        else:
                            if self.fixed_help_index == 0:
                                self.give_think_aloud()
                            elif self.fixed_help_index == 1:
                                self.give_hint()
                            elif self.fixed_help_index == 2:
                                self.give_example()
                            elif self.fixed_help_index >= 3:
                                self.give_tutorial()
                            else:
                                print "should not be happening"
                            self.fixed_help_index += 1

                    else:  #placeholder action selection for actual model
                        num = random.randint(0, 4)
                        time.sleep(
                            3
                        )  # let's wait a little before starting any help activity
                        if num == 0:
                            self.tic_tac_toe_break()
                        elif num == 1:
                            self.give_tutorial()
                        elif num == 2:
                            self.give_example()
                        elif num == 3:
                            self.give_hint()
                        else:
                            self.give_think_aloud()

        elif (
                data.msgType == "TICTACTOE-END"
        ):  # here I respond to the end of a game by going to the same question
            #elif (data.msgType == "TICTACTOE-WIN" or data.msgType == "TICTACTOE-LOSS"):
            self.log_transaction("TICTACTOE-END", -1, "")
            self.repeat_question()

        elif ("SHOWEXAMPLE" in data.msgType):
            pass

        elif ('SHOWING-QUESTION' in data.msgType):
            question_id = self.questions[self.level][
                self.current_question]['QuestionID']
            self.log_transaction("QUESTION", question_id, self.level)
            #placeholder to get current action on first attempt (should be no-action)
            if self.expGroup == 1:
                #self.action = self.simple_pomdp_graph_policy_belief_runner.get_action()
                if self.tries == 0:
                    print "START OF NEW QUESTION, MODEL CHOSE ACTION: " + str(
                        self.action)
                    if self.action != "no-action":
                        if self.action == "interactive-tutorial":
                            self.give_tutorial()
                        elif self.action == "worked-example":
                            self.give_example()
                        elif self.action == "hint":
                            self.give_hint()
                        elif self.action == "think-aloud":
                            self.give_think_aloud()
                        elif self.action == "break":
                            self.tic_tac_toe_break()
                else:
                    print "MIDDLE OF A QUESTION, so not actually doing action here: " + str(
                        self.action)

        elif (data.msgType == 'START' or data.msgType == 'LOAD'):
            print "MODEL RECEIVED START MESSAGE FROM TABLET_MSG --------------> setting up session"
            self.inSession = True
            self.pid = int(data.questionNumOrPart)
            self.sessionNum = int(data.questionType)
            self.expGroup = int(data.robotSpeech)
            print "EXPGROUP IS: " + str(self.expGroup)
            self.difficultyGroup = int(data.otherInfo)

            fileString = rospack.get_path(
                'nao_tutoring_behaviors') + "/scripts/logfiles/" + "P" + str(
                    self.pid) + "_S" + str(self.sessionNum) + ".txt"
            print fileString
            if os.path.exists(fileString):
                self.logFile = open(fileString, "a")
                self.loadSession = 1
            else:
                self.logFile = open(fileString, "w+")
            self.logFile.write(
                "PARTICIPANT_ID,EXP_GROUP,SESSION_NUM,TIMESTAMP,QUESTION_NUM,TYPE,OTHER_INFO\n"
            )

            if self.sessionNum == 1:
                self.attempt_times = []
                self.total_num_questions = 0
                if self.difficultyGroup == 1:
                    print "harder difficulty group"
                    self.questions = self.harder_questions
                    self.level = 3
                else:
                    print "easier difficulty group"
                    self.level = 1

                saveFileString = rospack.get_path(
                    'nao_tutoring_behaviors'
                ) + "/scripts/logfiles/" + "P" + str(self.pid) + "_save.json"
                if os.path.exists(
                        saveFileString
                ):  #only if this file already exists are we loading a session that crashed
                    with open(saveFileString) as param_file:
                        params = json.load(param_file)

                    self.expGroup = int(params["expGroup"])
                    self.difficultyGroup = int(params["difficultyGroup"])
                    num_problems = int(params["numProblemsCompleted"])
                    self.total_num_questions = num_problems  #this tracks total number of q's over all sessions
                    self.attempt_times = params["attemptTimes"]
                    self.fixed_help_index = params["fixedHelpIndex"]
                    #self.current_question = params["currentQuestionIndex"]
                    if self.difficultyGroup == 1:
                        self.questions = self.harder_questions
                        self.level = (num_problems % 3) + 3
                    else:
                        self.level = (num_problems % 3) + 1
                    self.current_question = num_problems / 3

                    if self.expGroup == 1:
                        self.current_belief = np.array(params["currentBelief"])
                        self.action_prob_knowledge_gain_mult = params[
                            "action_prob_knowledge_gain_mult"]
                        self.action_prob_engagement_gain_mult = params[
                            "action_prob_engagement_gain_mult"]

                else:  #the param_save file does not exist so it is a new session.
                    if self.expGroup == 1:  #if expGroup 1, read the knowledge_start_state file to choose start dist for pomdp
                        startStateFile = rospack.get_path(
                            'nao_tutoring_behaviors'
                        ) + "/scripts/logfiles/initial_knowledge_states.json"
                        if os.path.exists(startStateFile):
                            with open(startStateFile) as start_state_file:
                                start_states = json.load(start_state_file)

                            self.initial_knowledge_state = start_states[str(
                                self.pid)]

            else:  #later sessions after session 1
                self.attempt_times = []
                saveFileString = rospack.get_path(
                    'nao_tutoring_behaviors'
                ) + "/scripts/logfiles/" + "P" + str(self.pid) + "_save.json"
                if os.path.exists(saveFileString):
                    with open(saveFileString) as param_file:
                        params = json.load(param_file)

                    self.expGroup = int(params["expGroup"])
                    self.difficultyGroup = int(params["difficultyGroup"])
                    num_problems = int(params["numProblemsCompleted"])
                    self.total_num_questions = num_problems  #this tracks total number of q's over all sessions
                    self.attempt_times = params["attemptTimes"]
                    self.fixed_help_index = params["fixedHelpIndex"]
                    #self.current_question = params["currentQuestionIndex"]
                    if self.difficultyGroup == 1:
                        self.questions = self.harder_questions
                        self.level = (num_problems % 3) + 3
                    else:
                        self.level = (num_problems % 3) + 1
                    self.current_question = num_problems / 3

                    if self.expGroup == 1:
                        self.current_belief = np.array(params["currentBelief"])
                        self.action_prob_knowledge_gain_mult = params[
                            "action_prob_knowledge_gain_mult"]
                        self.action_prob_engagement_gain_mult = params[
                            "action_prob_engagement_gain_mult"]
                else:
                    print "error: tried to open param save file when it didnt exist"
            #self.send_first_question()
            #time.sleep(3) #wait a bit before sending first question - do we need this?
            #self.first_question()
            #self.next_question() #aditi - trying this instead since send_first_question does not exist
            if self.expGroup == 1:
                self.setup_pomdp()

        elif (data.msgType == 'END'):
            self.inSession = False
            print "End of session - should try to save whatever info is needed to restart for next session"
            self.save_params()
            self.log_transaction("END", -1, "")
            self.logFile.flush()
            self.logFile.close()

    def save_params(self):
        saveFileString = rospack.get_path(
            'nao_tutoring_behaviors') + "/scripts/logfiles/" + "P" + str(
                self.pid) + "_save.json"
        self.save_file = open(saveFileString, "w+")
        num_problems_completed = self.total_num_questions
        save_help_index = self.fixed_help_index
        if self.fixed_help_index > 0:
            save_help_index = self.fixed_help_index - 1

        save_params = {
            "expGroup": self.expGroup,
            "difficultyGroup": self.difficultyGroup,
            "numProblemsCompleted": num_problems_completed,
            "attemptTimes": self.attempt_times,
            "fixedHelpIndex": save_help_index
        }
        if self.expGroup == 1:
            print self.current_belief
            if self.tries != 0:
                #if we end on an attempt in the middle of a problem, we want to change the belief state to start back in A0 states
                for i in range(len(self.current_belief)):
                    if i % 4 == 0:
                        if self.current_belief[i + 2] != 0.0:
                            self.current_belief[i +
                                                1] = self.current_belief[i + 2]
                            self.current_belief[i + 2] = 0.0
                        elif self.current_belief[i + 3] != 0:
                            self.current_belief[i +
                                                1] = self.current_belief[i + 3]
                            self.current_belief[i + 3] = 0.0
            print self.current_belief
            save_params["currentBelief"] = self.current_belief.tolist()
            save_params[
                "action_prob_knowledge_gain_mult"] = self.action_prob_knowledge_gain_mult
            save_params[
                "action_prob_engagement_gain_mult"] = self.action_prob_engagement_gain_mult
        param_string = json.dumps(save_params, indent=4)
        self.save_file.write(param_string)
        self.save_file.flush()
        self.save_file.close()

    def robot_msg_callback(self, data):
        rospy.loginfo(
            rospy.get_caller_id() + " From Robot, I heard %s ",
            data)  # this model does nothing with robot messages, but it could
        # do so in this function
        if (data.data == "INTRO-DONE"):
            self.first_question()

    def run(self):

        while not rospy.is_shutdown():
            try:
                pass

            except KeyboardInterrupt:
                self.logFile.flush()
                self.logFile.close()
                if self.inSession:
                    self.save_params()
                #self.conn.close()
                #self.store_session(self.current_session)
                sys.exit(0)

        if self.inSession:
            print "saving params because app is done"
            self.save_params()
            self.logFile.flush()
            self.logFile.close()
Example #7
0
    def setup_pomdp(self):
        param_file = rospack.get_path(
            'nao_tutoring_behaviors'
        ) + "/scripts/data/03_13_B.json"  #the param file that "works" for the base model
        with open(param_file) as data_file:
            params = json.load(data_file)

        # discount factor
        self.discount = params["discount"]

        # state variables
        knowledge_states = params["knowledge_states"]
        engagement_states = params["engagement_states"]
        attempt_states = params["attempt_states"]
        self.num_knowledge_levels = len(knowledge_states)
        self.num_engagement_levels = len(engagement_states)
        self.num_attempts = len(attempt_states)
        self.all_states = combine_states_to_one_list(knowledge_states,
                                                     engagement_states,
                                                     attempt_states)
        self.num_states = len(self.all_states)

        # starting distribution
        start = np.zeros(self.num_states)
        if self.sessionNum == 1 and self.loadSession == 0:
            initial_state = int(self.initial_knowledge_state[1:])
            majority_start = 0.7
            minority_start = (1.0 -
                              majority_start) / (self.num_knowledge_levels - 1)
            for i in range(self.num_knowledge_levels):
                #start[4 + i * 8] = 1.0 / float(self.num_knowledge_levels) #uniform start state
                if i == initial_state:
                    start[4 + i * 8] = majority_start
                else:
                    start[4 + i * 8] = minority_start

            self.action_prob_knowledge_gain_mult = params[
                "action_prob_knowledge_gain_mult"]
            self.action_prob_engagement_gain_mult = params[
                "action_prob_engagement_gain_mult"]

        else:
            start = self.current_belief
            self.action_prob_knowledge_gain_mult = self.action_prob_knowledge_gain_mult  #should be set earlier
            self.action_prob_engagement_gain_mult = self.action_prob_engagement_gain_mult

        # probabilities associated with the transition matrix
        self.prob_knowledge_gain = params["prob_knowledge_gain"]
        self.prob_engagement_gain = params["prob_engagement_gain"]
        self.prob_engagement_loss = params["prob_engagement_loss"]
        self.prob_correct_answer = params["prob_correct_answer"]
        self.prob_correct_answer_after_1_attempt = params[
            "prob_correct_answer_after_1_attempt"]
        self.prob_drop_for_low_engagement = params[
            "prob_drop_for_low_engagement"]

        # actions
        self.actions = params["actions"]
        self.num_actions = len(self.actions)

        # action-related reward variables
        action_rewards = params["action_rewards"]
        engagement_reward = params["engagement_reward"]
        knowledge_reward = params["knowledge_reward"]
        end_state_remain_reward = params["end_state_remain_reward"]
        reward_for_first_attempt_actions = params[
            "reward_for_first_attempt_actions"]
        #action_prob_knowledge_gain_mult = params["action_prob_knowledge_gain_mult"]
        #self.action_prob_engagement_gain_mult = params["action_prob_engagement_gain_mult"]

        # observations
        correctness_obs = params["correctness_obs"]
        speed_obs = params["speed_obs"]
        self.all_obs = combine_obs_types_to_one_list(correctness_obs,
                                                     speed_obs)
        self.num_observations = len(self.all_obs)

        # observation related variables
        self.prob_speeds_for_low_engagement = params[
            "prob_speeds_for_low_engagement"]
        self.prob_speeds_for_high_engagement = params[
            "prob_speeds_for_high_engagement"]
        action_speed_multipliers = np.array(params["action_speed_multipliers"])

        self.R = generate_reward_matrix(
            actions=self.actions,
            action_rewards=action_rewards,
            engagement_reward=engagement_reward,
            knowledge_reward=knowledge_reward,
            end_state_remain_reward=end_state_remain_reward,
            num_knowledge_levels=self.num_knowledge_levels,
            num_engagement_levels=self.num_engagement_levels,
            num_attempts=self.num_attempts,
            num_observations=self.num_observations,
            reward_for_first_attempt_actions=reward_for_first_attempt_actions)

        self.T = generate_transition_matrix(
            num_knowledge_levels=self.num_knowledge_levels,
            num_engagement_levels=self.num_engagement_levels,
            num_attempts=self.num_attempts,
            prob_knowledge_gain=self.prob_knowledge_gain,
            prob_engagement_gain=self.prob_engagement_gain,
            prob_engagement_loss=self.prob_engagement_loss,
            action_prob_knowledge_gain_mult=self.
            action_prob_knowledge_gain_mult,
            action_prob_engagement_gain_mult=self.
            action_prob_engagement_gain_mult,
            prob_correct_answer=self.prob_correct_answer,
            prob_correct_answer_after_1_attempt=self.
            prob_correct_answer_after_1_attempt,
            prob_drop_for_low_engagement=self.prob_drop_for_low_engagement)

        self.O = generate_observation_matrix(
            knowledge_states=knowledge_states,
            engagement_states=engagement_states,
            attempt_states=attempt_states,
            correctness_obs=correctness_obs,
            speed_obs=speed_obs,
            num_actions=self.num_actions,
            prob_speeds_for_low_engagement=self.prob_speeds_for_low_engagement,
            prob_speeds_for_high_engagement=self.
            prob_speeds_for_high_engagement,
            action_speed_multipliers=action_speed_multipliers)

        #create POMDP model
        simple_pomdp = POMDP(self.T,
                             self.O,
                             self.R,
                             np.array(start),
                             self.discount,
                             states=self.all_states,
                             actions=self.actions,
                             observations=self.all_obs,
                             values='reward')

        self.simple_pomdp_graph_policy = simple_pomdp.solve(method='grid',
                                                            verbose=False,
                                                            n_iterations=500)

        self.simple_pomdp_graph_policy_belief_runner = GraphPolicyBeliefRunner(
            self.simple_pomdp_graph_policy, simple_pomdp)

        self.current_belief = self.simple_pomdp_graph_policy_belief_runner.current_belief  #should be the same as start?
        print "current belief is: "
        print self.current_belief
        self.action = self.simple_pomdp_graph_policy_belief_runner.get_action(
        )  #choose first action