lrp.b = 0.5 bestdepth = np.zeros(num_actions) bestdepth1 = np.zeros(num_actions) current_best = 0 current_best1 = 0 for j in range(n): # reset the action probabilities. # lrp.reset_actions() count = 0 # lrp.b = tune.find_optimal_b(lrp, env, det_obj) # Run a single experiment. Terminate if it reaches 10000 iterations. while(True and count < 10000): # Define m as the next action predicting the depth of the object. m = lrp.next_action() # Define req as the next detectable object depth. req = det_obj.request() # reward if m = req. resp = env.response(m, req) if(not resp): lrp.do_reward(m) else: lrp.do_penalty(m) if(max(lrp.p) > 0.999): # The best depth counting from 0. # Break at 98% convergence to a single depth. bestdepth[np.argmax(lrp.p)] += 1 break count += 1 if (current_best != np.argmax(bestdepth)): receiver.goto(-100, depths[np.argmax(bestdepth)]) current_best = np.argmax(bestdepth)
import helpers as h import math num_actions = 6 env = Environment(num_actions) dlri = DLRI(num_actions) bestdepth = np.zeros(num_actions) E = [0.1, 0.2, 0.4, 0.2, 0.01, 0.09] det_obj = Pinger(E) for k in range(5): for j in range(1000): # Caught me again... dlri.p = np.array(h.make_dp(num_actions)) m = math.floor(num_actions / 2) while (True): req = det_obj.request() resp = env.response(m, req) if (not resp): dlri.do_reward(m) else: dlri.do_penalty() m = dlri.next_action() if (max(dlri.p) == (num_actions * num_actions)): # The best depth counting from 0 (seasurface). bestdepth[np.argmax(dlri.p)] += 1 break # print("The best depth tally is : " + str(bestdepth)) print("Converge on depth: " + str(np.argmax(bestdepth))) print("The probability vector is: " + str(bestdepth / sum(bestdepth)))
a = tune.find_optimal_a(test_lrp, env, penalizer) print("The value for a after tuning is " + str(test_lrp.a)) b = tune.find_optimal_b(test_lrp, env, penalizer) print("The value for b after tuning is " + str(test_lrp.b)) test_lrp.a = a test_lrp.b = b n = 10000 bestdepth = np.zeros(5) for j in range(n): # reset the action probabilities. test_lrp.reset_actions() # Run a single experiment. Terminate if it reaches 10000 iterations. while (True): # Define m as the next action predicting the depth of the object. m = test_lrp.next_action() # Define req as the next detectable object depth. req = penalizer.request() # reward if m = req. resp = env.response(m, req) if (not resp): test_lrp.do_reward(m) else: test_lrp.do_penalty(m) if (max(test_lrp.p) > 0.98): # The best depth counting from 0. # Break at 98% convergence to a single depth. bestdepth[np.argmax(test_lrp.p)] += 1 break print("The desired probability vector is: " + str(penaly_probs)) print("The actual probability vector is: " + str(bestdepth / sum(bestdepth)))