lrp.b = 0.5
 bestdepth = np.zeros(num_actions)
 bestdepth1 = np.zeros(num_actions)
 current_best = 0
 current_best1 = 0
 for j in range(n):
     # reset the action probabilities.
     # lrp.reset_actions()
     count = 0
     # lrp.b = tune.find_optimal_b(lrp, env, det_obj)
     # Run a single experiment. Terminate if it reaches 10000 iterations.
     while(True and count < 10000):
         # Define m as the next action predicting the depth of the object.
         m = lrp.next_action()
         # Define req as the next detectable object depth.
         req = det_obj.request()
         # reward if m = req.
         resp = env.response(m, req)
         if(not resp):
             lrp.do_reward(m)
         else:
             lrp.do_penalty(m)
         if(max(lrp.p) > 0.999):
             # The best depth counting from 0.
             # Break at 98% convergence to a single depth.
             bestdepth[np.argmax(lrp.p)] += 1
             break
         count += 1
     if (current_best != np.argmax(bestdepth)):
         receiver.goto(-100, depths[np.argmax(bestdepth)])
         current_best = np.argmax(bestdepth)
Example #2
0
import helpers as h
import math

num_actions = 6

env = Environment(num_actions)
dlri = DLRI(num_actions)
bestdepth = np.zeros(num_actions)
E = [0.1, 0.2, 0.4, 0.2, 0.01, 0.09]
det_obj = Pinger(E)
for k in range(5):
    for j in range(1000):
        # Caught me again...
        dlri.p = np.array(h.make_dp(num_actions))
        m = math.floor(num_actions / 2)
        while (True):
            req = det_obj.request()
            resp = env.response(m, req)
            if (not resp):
                dlri.do_reward(m)
            else:
                dlri.do_penalty()
            m = dlri.next_action()
            if (max(dlri.p) == (num_actions * num_actions)):
                # The best depth counting from 0 (seasurface).
                bestdepth[np.argmax(dlri.p)] += 1
                break
    # print("The best depth tally is : " + str(bestdepth))
    print("Converge on depth: " + str(np.argmax(bestdepth)))
    print("The probability vector is: " + str(bestdepth / sum(bestdepth)))
a = tune.find_optimal_a(test_lrp, env, penalizer)
print("The value for a after tuning is " + str(test_lrp.a))
b = tune.find_optimal_b(test_lrp, env, penalizer)
print("The value for b after tuning is " + str(test_lrp.b))
test_lrp.a = a
test_lrp.b = b
n = 10000
bestdepth = np.zeros(5)
for j in range(n):
    # reset the action probabilities.
    test_lrp.reset_actions()
    # Run a single experiment. Terminate if it reaches 10000 iterations.
    while (True):
        # Define m as the next action predicting the depth of the object.
        m = test_lrp.next_action()
        # Define req as the next detectable object depth.
        req = penalizer.request()
        # reward if m = req.
        resp = env.response(m, req)
        if (not resp):
            test_lrp.do_reward(m)
        else:
            test_lrp.do_penalty(m)
        if (max(test_lrp.p) > 0.98):
            # The best depth counting from 0.
            # Break at 98% convergence to a single depth.
            bestdepth[np.argmax(test_lrp.p)] += 1
            break
print("The desired probability vector is: " + str(penaly_probs))
print("The actual probability vector is: " + str(bestdepth / sum(bestdepth)))