예제 #1
0
                single_sample.append([s[0], prev_action])
            else:
                index = np.argmin(
                    [abs(act - prev_action) for act in abs_opt_policy[mcrst]])
                single_sample.append([s[0], abs_opt_policy[mcrst][index]])
        fictitious_samples.append(single_sample)
    return fictitious_samples


for i in range(0, N_ITERATION):
    determin_samples = sampling_from_det_pol(env, N_EPISODES, N_STEPS,
                                             det_param_a, det_param_b)
    abstraction.divide_samples(determin_samples, problem)
    abstraction.compute_abstract_tf(optA, ENV_NOISE)

    abs_opt_pol = abs_updater.solve_mdp(abstraction.get_container())

    fictitious_samples = sampling_abstract_optimal_pol(abs_opt_pol,
                                                       determin_samples,
                                                       det_param_a,
                                                       det_param_b)
    det_param_a = det_upd.batch_gradient_update_a(det_param_a, det_param_b,
                                                  fictitious_samples)
    det_param_b = det_upd.batch_gradient_update_b(det_param_a, det_param_b,
                                                  fictitious_samples)
    estj = helper.estimate_J_from_samples(determin_samples, GAMMA)

    print("Updated deterministic policy parameter A: {}".format(det_param_a))
    print("Updated deterministic policy parameter B: {}".format(det_param_b))
    print("Updated estimated performance measure: {}\n".format(estj))
예제 #2
0
                else:
                    index = np.argmin([
                        abs(act - prev_action) for act in abs_opt_policy[mcrst]
                    ])
                    single_sample.append([s[0], abs_opt_policy[mcrst][index]])

        fictitious_samples.append(single_sample)
    return fictitious_samples


for i in range(0, N_ITERATION):
    deterministic_samples = sampling_from_det_pol(env, N_EPISODES, N_STEPS,
                                                  det_param)
    abstraction.divide_samples(deterministic_samples, problem)
    abstraction.compute_abstract_tf()
    abstract_optimal_policy = abs_updater.solve_mdp(
        abstraction.get_container())

    fictitious_samples = sampling_abstract_optimal_pol(abstract_optimal_policy,
                                                       deterministic_samples,
                                                       det_param)
    det_param = det_upd.batch_gradient_update(det_param, fictitious_samples)
    # j = env.computeJ(det_param, ENV_NOISE, N_EPISODES)
    absj = helper.estimate_J_from_samples(deterministic_samples, GAMMA)

    print("Updated deterministic policy parameter: {}".format(det_param))
    print("Updated estimated performance measure: {}\n".format(absj))
    visualizer.show_values(det_param, absj, absj)

visualizer.save_image()
예제 #3
0
def main(seed=None):

    help = Helper(seed)

    # load and configure the environment.
    env = gym.make('LQG1D-v0')
    env.sigma_noise = ENV_NOISE
    env.A = np.array([A]).reshape((1, 1))
    env.B = np.array([B]).reshape((1, 1))
    env.gamma = GAMMA
    env.seed(help.getSeed())

    INTERVALS = helper.get_constant_intervals(MIN_SPACE_VAL, MAX_SPACE_VAL,
                                              N_MCRST_DYN)
    print("INTERVALS: {}\n{}\n".format(N_MCRST_DYN, INTERVALS))

    # calculate the optimal values of the problem.
    opt_par4vis = round(env.computeOptimalK()[0][0], 3)
    det_param = INIT_DETERMINISTIC_PARAM
    optJ4vis = round(env.computeJ(env.computeOptimalK(), 0, N_EPISODES), 3)
    # logging.basicConfig(level=logging.DEBUG, filename='../test.log', filemode='w', format='%(message)s')

    filename = "../csv/lqg1d/DPO/data{}.csv".format(help.getSeed())
    data_file = open(filename, mode='w')
    file_writer = csv.writer(data_file,
                             delimiter=',',
                             quotechar='"',
                             quoting=csv.QUOTE_MINIMAL)

    # instantiate the components of the algorithm.
    # abstraction = LqgFKnown(A, B, GAMMA, SINK, INTERVALS)
    abstraction = LipschitzDeltaS(GAMMA, SINK, INTERVALS, A, B) if not STOCH else \
        MaxLikelihoodAbstraction(GAMMA, SINK, INTERVALS, B * STOCH_L_MULTIPLIER)

    abs_updater = None
    if not STOCH:
        abs_updater = AbsUpdater(GAMMA, SINK, INTERVALS) if ds0 else IVI(
            GAMMA, SINK, True, INTERVALS)
    else:
        abs_updater = AbsUpdater(GAMMA, SINK, INTERVALS)
    det_upd = Updater(help.getSeed(), UPD_LAM)

    title = "A={}, B={}, Opt par={}, Opt J={}, Noise std dev={}".format(
        A, B, opt_par4vis, optJ4vis, ENV_NOISE)
    key = "{}_{}_{}_{}_{}".format(A, B, ENV_NOISE, det_param, help.getSeed())
    key = key.replace('.', ',')
    key = key + ".jpg"
    initJ = env.computeJ(det_param, 0, N_EPISODES)
    visualizer = Lqg1dVisualizer(title, key, det_param, opt_par4vis, initJ,
                                 optJ4vis)
    visualizer.clean_panels()

    # PLOTTER INFO
    stats = {}
    stats['param'] = []
    stats['j'] = []
    stats['sampleJ'] = []
    stats['abstractJ'] = []
    stats['param'].append(det_param)
    stats['j'].append(initJ)
    # ------------

    writer_min = SummaryWriter('runs/min')
    writer_max = SummaryWriter('runs/max')
    writer_opt = SummaryWriter('runs/opt')

    for i in range(0, N_ITERATION):
        determin_samples = sampling_from_det_pol(env, N_EPISODES, N_STEPS,
                                                 det_param)
        # dyn_intervals = helper.build_mcrst_from_samples(determin_samples, N_MCRST_DYN, MIN_SPACE_VAL, MAX_SPACE_VAL)
        dyn_intervals = None
        abstraction.divide_samples(determin_samples,
                                   problem,
                                   help.getSeed(),
                                   intervals=dyn_intervals)
        abstraction.compute_abstract_tf(ds0, ENV_NOISE)

        abs_opt_pol = abs_updater.solve_mdp(abstraction.get_container(),
                                            intervals=dyn_intervals)

        # tensorboard
        for mcrst, ap in enumerate(abs_opt_pol):
            if len(ap) > 1:
                ap = ap[0]
            writer_opt.add_scalar('mcrst{}'.format(mcrst), ap, i)
        for mcrst, cont in enumerate(abstraction.get_container()):
            writer_min.add_scalar('mcrst{}'.format(mcrst), min(cont.keys()), i)
            writer_max.add_scalar('mcrst{}'.format(mcrst), max(cont.keys()), i)

        # ---- performance abstract policy ---
        first_states_ep = [d[0][0] for d in determin_samples]
        absJ = estimate_performance_abstract_policy(env, N_EPISODES, N_STEPS,
                                                    abs_opt_pol,
                                                    first_states_ep,
                                                    dyn_intervals, INTERVALS)
        # ------------------------------------

        fictitious_samples = sampling_abstract_optimal_pol(
            abs_opt_pol, determin_samples, det_param, dyn_intervals, INTERVALS)
        det_param = det_upd.batch_gradient_update(det_param,
                                                  fictitious_samples)

        j = env.computeJ(det_param, 0, N_EPISODES)
        estj = helper.estimate_J_from_samples(determin_samples, GAMMA)

        print("{} - Updated deterministic policy parameter: {}".format(
            i, det_param))
        print("Updated performance measure: {}".format(j))
        print("Updated estimated performance measure: {}\n".format(estj))
        visualizer.show_values(det_param, j, estj, absJ)

        file_writer.writerow([det_param, j, estj, absJ])

        # PLOTTER INFO
        stats['param'].append(det_param)
        stats['j'].append(j)
        stats['sampleJ'].append(estj)
        stats['abstractJ'].append(absJ)
        # ------------

    visualizer.save_image()
    writer_min.close()
    writer_max.close()
    writer_opt.close()
    return stats, opt_par4vis, optJ4vis
예제 #4
0
def main(seed=None, alpha=0.001, lam=0.0005):

    help = Helper(seed)

    # load and configure the environment.
    env = gym.make('ComplexMiniGolf-v0')
    env.sigma_noise = ENV_NOISE
    env.gamma = GAMMA
    env.seed(help.getSeed())

    # logging.basicConfig(level=logging.DEBUG, filename='../../test.log', filemode='w', format='%(message)s')
    cumulative_fail = 0
    cumulative_j = 0

    filename = "../csv/minigolf/friction1.9/DPO/ALPHA={}/LAM={}/data{}.csv".format(
        alpha, lam, help.getSeed())
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    data_file = open(filename, mode='w')
    file_writer = csv.writer(data_file,
                             delimiter=',',
                             quotechar='"',
                             quoting=csv.QUOTE_MINIMAL)

    rbf = RBFNet(CENTERS, STD_DEV, INIT_W, help.getSeed(), alpha, lam)
    # rbf = RBFNet([3, 6, 10, 14, 17], [0.1, 0.3, 0.5, 0.7, 1])
    # rbf = RBFNet([3, 6, 10, 14, 17], [0.49, 0.63, 0.79, 0.95, 1.33], help.getSeed())
    visualizer = MGVisualizer(
        "MG visualizer", "minigolf/DPO/ALPHA={}/LAM={}/test{}.png".format(
            alpha, lam, help.getSeed()))
    visualizer.clean_panels()

    # PLOTTER INFO
    stats = {}
    stats['w1'] = []
    stats['w2'] = []
    stats['w3'] = []
    stats['w4'] = []
    stats['j'] = []
    stats['fail'] = []
    # ------------

    for i in range(0, N_ITERATION):

        determin_samples = sampling_from_det_pol(env, N_EPISODES, N_STEPS, rbf)
        INTERVALS = helper.get_constant_intervals([MIN_SPACE_VAL],
                                                  [MAX_SPACE_VAL],
                                                  [N_MCRST_DYN])[0]
        # dyn_intervals = helper.build_mcrst_from_samples(determin_samples, N_MCRST_DYN, MIN_SPACE_VAL, MAX_SPACE_VAL)
        INTERVALS = [[-4, 0]] + INTERVALS
        dyn_intervals = None

        if i == 0:
            abstraction = LipschitzDeltaS(
                GAMMA, SINK, INTERVALS) if ds0 else LipschitzDeltaS(
                    GAMMA, SINK, INTERVALS, 1.3, 0.9)
            # abstraction = MaxLikelihoodAbstraction(GAMMA, SINK, INTERVALS, 5.5)
            abs_updater = AbsUpdater(GAMMA, SINK, INTERVALS, -100) if ds0 and LDELTAS == 0 else \
                IVI(GAMMA, SINK, True, INTERVALS)
            # abs_updater = AbsUpdater(GAMMA, SINK, INTERVALS, 0)

        abstraction.divide_samples(determin_samples,
                                   problem,
                                   help.getSeed(),
                                   intervals=dyn_intervals)
        abstraction.compute_abstract_tf(ds0, LDELTAS)

        abs_opt_pol = abs_updater.solve_mdp(abstraction.get_container(),
                                            intervals=dyn_intervals)

        fictitious_samples = sampling_abstract_optimal_pol(
            abs_opt_pol, determin_samples, rbf, INTERVALS)
        fictitious_samples = helper.flat_listoflists(fictitious_samples)
        X = [f[0] for f in fictitious_samples]
        y = [f[1] for f in fictitious_samples]
        # X = np.reshape([f[0] for f in fictitious_samples], (len(fictitious_samples),))
        # y = np.reshape([f[1] for f in fictitious_samples], (len(fictitious_samples),))
        rbf.fit(X, y)
        estj = helper.estimate_J_from_samples(determin_samples, GAMMA)

        cumulative_j += estj
        print("Iteration n.{}".format(i))
        print("W: {}".format(rbf.w))
        print("Updated estimated performance measure: {}".format(estj))
        zeros, hundred, failing_states = helper.minigolf_reward_counter(
            determin_samples)
        print("Number of zeroes: {} - Number of big penalties: {}".format(
            zeros, hundred))
        print("Failing states: {}".format(failing_states))
        cumulative_fail += hundred
        print("Cumulative fails: {}\n".format(cumulative_fail))

        # actions = [m.keys() for m in abstraction.get_container()]
        # action_range = [max(a) - min(a) if len(a) > 0 else 0 for a in actions]
        # intervals = dyn_intervals if dyn_intervals is not None else INTERVALS
        # [print("Mcrst = {}, diameter = {}, action range = {}".format(dyn, dyn[1] - dyn[0], ran)) for dyn, ran in
        #     zip(intervals, action_range)]
        # print("\n")

        w = rbf.w
        visualizer.show_values(w, estj, cumulative_fail)
        file_writer.writerow([w[0], w[1], w[2], w[3], cumulative_fail, estj])

        # --- APPENDIX E ---
        if i == 0 or i == 99 or i == 199 or i == 299 or i == 399 or i == 499:
            filename2 = "../csv/minigolf/appendix/ALPHA={}/LAM={}/it{}/data{}.csv".format(
                alpha, lam, i, help.getSeed())
            os.makedirs(os.path.dirname(filename2), exist_ok=True)
            data_file2 = open(filename2, mode='w')
            file_writer2 = csv.writer(data_file2,
                                      delimiter=',',
                                      quotechar='"',
                                      quoting=csv.QUOTE_MINIMAL)
            file_writer2.writerow([
                'mcrst', 'min_a', 'max_a', 'min_opt_a', 'max_opt_a', 'w1',
                'w2', 'w3', 'w4'
            ])
            for j in range(1, len(abstraction.get_container()) - 1):
                actions = abstraction.get_container()[j].keys()
                w = rbf.w
                file_writer2.writerow([
                    j,
                    min(actions),
                    max(actions),
                    min(abs_opt_pol[j]),
                    max(abs_opt_pol[j]), w[0], w[1], w[2], w[3]
                ])
            data_file2.close()
        # ------------------

        # PLOTTER INFO
        # if i % 10 == 0:
        stats['w1'].append(w[0])
        stats['w2'].append(w[1])
        stats['w3'].append(w[2])
        stats['w4'].append(w[3])
        stats['j'].append(estj)
        stats['fail'].append(cumulative_fail)
        # ------------

    visualizer.save_image()
    return stats, cumulative_j