Esempio n. 1
0
def main():
    step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75]
    dim_state = 3
    dim_action = len(step_sizes)
    env = IntegrationEnv(fun=Sinus(),
                         max_iterations=256,
                         initial_step_size=0.1,
                         step_sizes=step_sizes,
                         error_tol=0.0005,
                         nodes_per_integ=dim_state)
    num_episodes = 500

    predictors = [PredictorConst(i) for i in range(dim_action)]
    predictors.append(
        PredictorQ(
            build_value_model(dim_state=dim_state,
                              dim_action=dim_action,
                              filename='predictor'), load('scaler.bin')))

    scores = benchmark(
        predictors,
        IntegratorLinReg(step_sizes, load('linreg_models.bin'),
                         load('scaler.bin')), num_episodes, env)

    # scores = benchmark(predictors,
    #                    Simpson(),
    #                    num_episodes,
    #                    env)

    print(scores / num_episodes)
Esempio n. 2
0
def main():
    # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4]
    # step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75]
    step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67]
    dim_state = 3
    dim_action = len(step_sizes)
    memory = 1

    env = IntegrationEnv(fun=Sinus(),
                         max_iterations=256,
                         initial_step_size=0.075,
                         error_tol=7.5e-6,
                         nodes_per_integ=dim_state,
                         memory=memory,
                         x0=0,
                         max_dist=20,
                         step_size_range=(step_sizes[0], step_sizes[-1]))
    scaler = load('scaler.bin')
    predictor = PredictorQ(
        step_sizes=step_sizes,
        model=build_value_model(dim_state=dim_state,
                                dim_action=dim_action,
                                filename=None,
                                lr=0.00001,
                                memory=memory),
        scaler=load('model_quad/model_sinus/Simpson/scaler.bin'))
    integrator = Simpson()

    estimator = Estimator(build_estimator_model(dim_state,
                                                lr=0.0001,
                                                filename='estimator'),
                          scaler,
                          threshold=100 * 7.5e-6)

    train_model(estimator, env, predictor, integrator, 5000, scaler)
def one_fun_boole():
    x0 = 0
    x1 = 10
    step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4]
    # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4]
    dim_state = 5
    dim_action = len(step_sizes)
    memory = 1
    env = IntegrationEnv(fun=Sinus(),
                         max_iterations=256,
                         initial_step_size=0.1,
                         step_sizes=step_sizes,
                         error_tol=0.000001,
                         memory=memory,
                         nodes_per_integ=dim_state)
    predictor = PredictorQ(
        build_value_model(dim_state=dim_state,
                          dim_action=dim_action,
                          filename='predictor',
                          memory=memory), load('scaler_boole_mem1.bin'))
    # integrator = IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin'))
    integrator = Boole()

    _, evals, x1, errors = integrate_env(predictor,
                                         integrator,
                                         env,
                                         x0,
                                         x1,
                                         plot=True)
    print('new x1: {}'.format(x1))
    print('Predictor error total: {}'.format(np.sum(errors)))
    print('Predictor error per step: {}'.format(np.mean(errors)))
    print('Predictor evals: {}'.format(evals))
    print('')

    env.reset(reset_params=False)
    booles = BoolesRule(env.fun, x0, x1)
    integ_simps, errors = booles(num_evals=evals, stepwise_error=True)
    print('Boole error total: {}'.format(np.sum(errors)))
    print('Boole error per step: {}'.format(np.mean(errors)))
    print('Boole evals: {}'.format(booles.evals))
    print('')
    booles.plot()

    env.reset(reset_params=False)
    simps = Simps(env.fun, x0, x1)
    integ_simps, errors = simps(num_evals=evals, stepwise_error=True)
    print('Simpson error total: {}'.format(np.sum(errors)))
    print('Simpson error per step: {}'.format(np.mean(errors)))
    print('Simpson evals: {}'.format(simps.evals))
    print('')
    simps.plot()

    env.reset(reset_params=False)
    rom = Romberg(env.fun, x0, x1, tol=0.0005, order=3)
    integ, errors = rom(0.15, stepwise_errors=True)
    print('Romberg error total: {}'.format(np.sum(errors)))
    print('Romberg error per step: {}'.format(np.mean(errors)))
    print('Romberg evals: {}'.format(rom.evals))
    rom.plot()
Esempio n. 4
0
def test_sinus():
    f = Sinus()
    tol = 0.0005
    integ_rom = 0.0
    evals_rom_step = 0
    rom = Romberg(f, 0, 10, tol=tol, order=6)
    for j in range(10):
        rom = Romberg(f, 0, 10, tol=tol, order=6)
        integ_rom, errors = rom(0.15, True)
        error_rom_step = np.mean(errors)
        print(error_rom_step)
        evals_rom_step = rom.evals
        if error_rom_step < 0.0001:
            tol *= 2.0
        elif error_rom_step > 0.0005:
            tol /= 3.0
        else:
            break

    print('evaluations: {}'.format(evals_rom_step))
    print('global error: {}'.format(abs(integ_rom - f.integral(0, 20))))

    rom.plot()
Esempio n. 5
0
def save_scaler():
    # step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75]
    # step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4]
    step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67]
    env = IntegrationEnv(fun=Sinus(),
                         max_iterations=256,
                         initial_step_size=0.075,
                         error_tol=7.5e-6,
                         nodes_per_integ=3,
                         memory=1,
                         x0=-1,
                         max_dist=2,
                         step_size_range=(step_sizes[0], step_sizes[-1]))

    # build Scaler
    scaler = StandardScaler()
    scaler.fit(env.sample_states(50000))
    dump(scaler, 'scaler_mem1.bin', compress=True)
Esempio n. 6
0
def one_fun():
    x0 = 0.0
    x1 = 10.0
    step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75]
    dim_state = 3
    dim_action = len(step_sizes)
    env = IntegrationEnv(fun=Sinus(),
                         max_iterations=256,
                         initial_step_size=0.15,
                         step_sizes=step_sizes,
                         error_tol=0.0005)
    predictor = PredictorQ(
        build_value_model(dim_state=dim_state,
                          dim_action=dim_action,
                          filename='predictor'), load('scaler.bin'))
    integ, evals, x1, _ = integrate_env(predictor,
                                        Simpson(),
                                        env,
                                        x0,
                                        x1,
                                        plot=True)
    print('new x1: {}'.format(x1))
    print('Predictor error: {}'.format(abs(env.fun.integral(x0, x1) - integ)))
    print('Predictor evals: {}'.format(evals))

    env.reset(reset_params=False)
    asr = AdaptSimpsConstEvals(env.fun, x0, x1)
    integ = asr(evals)
    print('ASR error: {}'.format(abs(env.fun.integral(x0, x1) - integ)))
    print('ASR evals: {}'.format(asr.evals))
    asr.plot()

    env.reset(reset_params=False)
    simps = Simps(env.fun, x0, x1)
    integ_simps = simps(num_evals=evals)
    print('Simpson error: {}'.format(
        abs(env.fun.integral(x0, x1) - integ_simps)))
    print('Simpson evals: {}'.format(simps.evals))
    simps.plot()
Esempio n. 7
0
def main():
    gamma = 0.0
    num_episodes = 100000
    # step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4]
    step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75]
    # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4]
    # step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67]
    dim_state = 3  # nodes per integration step
    dim_action = len(step_sizes)
    memory = 0  # how many integration steps the predictor can look back

    # 7.5e-6
    env = IntegrationEnv(fun=Sinus(),
                         max_iterations=256,
                         initial_step_size=0.075,
                         error_tol=7.5e-6,
                         nodes_per_integ=dim_state,
                         memory=memory,
                         x0=0,
                         max_dist=20,
                         step_size_range=(step_sizes[0], step_sizes[-1]))
    # env = IntegrationEnv(fun=Sinus(), max_iterations=128, initial_step_size=0.1, step_sizes=step_sizes,
    #                      error_tol=0.0005, nodes_per_integ=dim_state, memory=memory)
    experience = Experience(batch_size=32)

    predictor = PredictorQ(
        step_sizes=step_sizes,
        model=build_value_model(dim_state=dim_state,
                                dim_action=dim_action,
                                filename=None,
                                lr=0.00001,
                                memory=memory),
        scaler=load('model_quad/model_sinus/Simpson/scaler.bin'))
    # integrator = IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin'))
    # integrator = Boole()
    integrator = Simpson()

    perf_tracker = PerformanceTracker(env, num_testfuns=1000, x0=-1, x1=1)
    # losses = []
    # moving_average = []

    for episode in range(num_episodes):
        state = env.reset()
        reward_total = 0
        loss_this_episode = 0
        steps = 0
        done = False
        eps = 0.66

        if episode < 0:
            # eps = 0.01 + (1.0 - 0.01) * math.exp(-0.023 * episode
            eps = 0.2 + 0.8 * 2.71828**(
                -0.0146068 * episode
            )  # decrease from 1.0 to approx 0.2 at episode 300

        print('episode: {}'.format(episode))

        while not done:
            # get action from actor
            actions = predictor.get_actions(state)
            if episode < 0:
                action = choose_action(actions, eps, dim_action)
            else:
                action = choose_action3(actions, eps, dim_action)
            step_size = predictor.action_to_stepsize(action)

            # execute action
            next_state, reward, done, _ = env.iterate(step_size, integrator)
            steps += 1
            reward_total += reward

            # learning
            action_next_state = predictor.get_actions(next_state)
            target = reward + gamma * np.max(action_next_state)
            target_actions = actions.squeeze()
            target_actions[action] = target
            # print(target)
            # print('')

            experience.append(state=state, target=target_actions)
            if experience.is_full() or done:
                states, targets = experience.get_samples()
                loss_predictor = predictor.train_on_batch(states, targets)
                loss_this_episode += loss_predictor
                experience.reset()

            state = next_state

        print('reward: {}'.format(reward_total))
        print('loss_predictor: {}'.format(loss_this_episode))

        # losses.append(loss_this_episode)
        # if episode % 10 == 0 and len(losses) > 99:
        #     moving_average.append(np.mean(losses[-100:]))
        #     plt.plot(moving_average, 'r')
        #     plt.pause(0.05)
        if episode % 100 == 0:
            perf_tracker.evaluate_performance(predictor, integrator)
            perf_tracker.plot()
            perf_tracker.plot_pareto(num_points=7)

        # if episode % 250 == 0:
        #     env.plot(episode=episode, x_min=-1.5, x_max=1.5)
        if episode % 10 == 0:
            predictor.model.save_weights('predictor')
def compare_romberg():
    x0 = 0.0
    num_samples = 100
    error_predictor = []
    error_rom = []
    evals_rom = []
    evals_predictor = []

    step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75]
    # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4]
    dim_state = 3
    dim_action = len(step_sizes)
    env = IntegrationEnv(fun=Sinus(),
                         max_iterations=256,
                         initial_step_size=0.15,
                         step_sizes=step_sizes,
                         error_tol=0.0005)
    predictor = PredictorQ(
        build_value_model(dim_state=dim_state,
                          dim_action=dim_action,
                          filename='predictor'), load('scaler.bin'))

    for i in range(num_samples):
        if i % 10 == 0:
            print(i)

        x1 = 20.0

        # model
        env.reset()
        _, evals, x1, errors = integrate_env(predictor, Simpson(), env, x0, x1)
        error_pred_step = np.mean(errors)
        env.reset(reset_params=False)

        # romberg
        tol = 0.0003

        rom = Romberg(env.fun, x0, x1, tol=tol, order=2)
        integ_rom, errors = rom(0.15, True)
        error_rom_step = np.mean(errors)
        evals_rom_step = rom.evals

        # for j in range(10):
        #     rom = Romberg(env.fun, x0, x1, tol=tol, order=2)
        #     integ_rom, errors = rom(0.15, True)
        #     error_rom_step = np.mean(errors)
        #     evals_rom_step = rom.evals
        #     if error_rom_step < 0.0001:
        #         tol *= 2.0
        #     elif error_rom_step > 0.0005:
        #         tol /= 3.0
        #     else:
        #         break

        error_predictor.append(error_pred_step)
        error_rom.append(error_rom_step)
        evals_predictor.append(evals)
        evals_rom.append(evals_rom_step)

    # error_rom = np.array(error_rom)
    # not_converged = np.concatenate((error_rom[error_rom > 0.0005], error_rom[error_rom < 0.0001]))
    # if len(not_converged > 0):
    #     print('romberg did not converge in some cases:')
    #     print(not_converged)

    mean_error_predictor = np.mean(error_predictor)
    var_error_predictor = np.var(error_predictor)

    mean_error_rom = np.mean(error_rom)
    var_error_rom = np.var(error_rom)

    mean_evals_predictor = np.mean(evals_predictor)
    var_evals_predictor = np.var(evals_predictor)

    mean_evals_rom = np.mean(evals_rom)
    var_evals_rom = np.var(evals_rom)

    print(
        'Avg. predictor number of function evaluations per episode: {}'.format(
            mean_evals_predictor))
    print('Avg. predictor error per step: {}'.format(mean_error_predictor))
    print('Avg. romberg number of function evaluations per episode: {}'.format(
        mean_evals_rom))
    print('Avg. romberg error per step: {}'.format(mean_error_rom))
    print('')
    print(
        'Variance of predictor number of function evaluations per episode: {}'.
        format(var_evals_predictor))
    print(
        'Variance of predictor error per step: {}'.format(var_error_predictor))
    print('Variance of rom number of function evaluations per episode: {}'.
          format(var_evals_rom))
    print('Variance of rom error per step: {}'.format(var_error_rom))