def pomdp_choose_action( pomdp, Q, belief_for_policy, action_code_list ): new_machine_action_number = tbh_pomdp_util.solve_qmdp( belief_for_policy, pomdp, Q ) new_machine_action = action_code_list[new_machine_action_number] #print "ACTION NO., ACTION", new_machine_action_number, new_machine_action # Determine if we need to reset the policy reset_belief = determine_reset_belief( new_machine_action ) if reset_belief: final_belief = tbh_pomdp_util.reset_belief( pomdp ) else: final_belief = belief_for_policy pomdp_dict = { 'belief_for_policy': belief_for_policy, 'final_belief': final_belief, 'new_machine_action': new_machine_action, 'reset_belief': reset_belief } return pomdp_dict
def pomdp_choose_action( pomdp, Q, belief_for_policy, action_code_list ): new_machine_action_number = tbh_pomdp_util.solve_qmdp( belief_for_policy, pomdp, Q ) new_machine_action = action_code_list[new_machine_action_number] print "NEW MACHINE ACTION" print new_machine_action # Determine if we need to reset the policy #reset_belief = determine_reset_belief( new_machine_action ) #if reset_belief: # final_belief = tbh_pomdp_util.reset_belief( pomdp ) #else: # final_belief = belief_for_policy final_belief = belief_for_policy reset_belief = False pomdp_dict = { 'belief_for_policy': belief_for_policy, 'final_belief': final_belief, 'new_machine_action': new_machine_action, 'reset_belief': reset_belief } return pomdp_dict