def main(): # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] # step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67] dim_state = 3 dim_action = len(step_sizes) memory = 1 env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.075, error_tol=7.5e-6, nodes_per_integ=dim_state, memory=memory, x0=0, max_dist=20, step_size_range=(step_sizes[0], step_sizes[-1])) scaler = load('scaler.bin') predictor = PredictorQ( step_sizes=step_sizes, model=build_value_model(dim_state=dim_state, dim_action=dim_action, filename=None, lr=0.00001, memory=memory), scaler=load('model_quad/model_sinus/Simpson/scaler.bin')) integrator = Simpson() estimator = Estimator(build_estimator_model(dim_state, lr=0.0001, filename='estimator'), scaler, threshold=100 * 7.5e-6) train_model(estimator, env, predictor, integrator, 5000, scaler)
def main(): step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.1, step_sizes=step_sizes, error_tol=0.0005, nodes_per_integ=dim_state) num_episodes = 500 predictors = [PredictorConst(i) for i in range(dim_action)] predictors.append( PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor'), load('scaler.bin'))) scores = benchmark( predictors, IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin')), num_episodes, env) # scores = benchmark(predictors, # Simpson(), # num_episodes, # env) print(scores / num_episodes)
def one_fun_boole(): x0 = 0 x1 = 10 step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] dim_state = 5 dim_action = len(step_sizes) memory = 1 env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.1, step_sizes=step_sizes, error_tol=0.000001, memory=memory, nodes_per_integ=dim_state) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor', memory=memory), load('scaler_boole_mem1.bin')) # integrator = IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin')) integrator = Boole() _, evals, x1, errors = integrate_env(predictor, integrator, env, x0, x1, plot=True) print('new x1: {}'.format(x1)) print('Predictor error total: {}'.format(np.sum(errors))) print('Predictor error per step: {}'.format(np.mean(errors))) print('Predictor evals: {}'.format(evals)) print('') env.reset(reset_params=False) booles = BoolesRule(env.fun, x0, x1) integ_simps, errors = booles(num_evals=evals, stepwise_error=True) print('Boole error total: {}'.format(np.sum(errors))) print('Boole error per step: {}'.format(np.mean(errors))) print('Boole evals: {}'.format(booles.evals)) print('') booles.plot() env.reset(reset_params=False) simps = Simps(env.fun, x0, x1) integ_simps, errors = simps(num_evals=evals, stepwise_error=True) print('Simpson error total: {}'.format(np.sum(errors))) print('Simpson error per step: {}'.format(np.mean(errors))) print('Simpson evals: {}'.format(simps.evals)) print('') simps.plot() env.reset(reset_params=False) rom = Romberg(env.fun, x0, x1, tol=0.0005, order=3) integ, errors = rom(0.15, stepwise_errors=True) print('Romberg error total: {}'.format(np.sum(errors))) print('Romberg error per step: {}'.format(np.mean(errors))) print('Romberg evals: {}'.format(rom.evals)) rom.plot()
def visualize_predictor(): step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67] dim_state = 3 dim_action = len(step_sizes) predictor = PredictorQ(step_sizes=step_sizes, model=build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor', lr=0.0001), scaler=load('scaler.bin')) predictor.visualize([0.1, (-1.5, 1.5), (-1.5, 1.5)], step_sizes, flat=True)
def pareto_model(): x0 = -1 x1 = 1 num_samples = 500 memory = 1 # step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] # step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=BrokenPolynomial(), max_iterations=1000, initial_step_size=0.075, step_sizes=step_sizes, error_tol=0.0005, memory=memory, nodes_per_integ=dim_state) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor', memory=memory), load('scaler_mem1.bin')) # integrator = IntegratorLinReg(step_sizes, load('linreg_models_estim.bin')) integrator = Simpson() # estimator = Estimator(build_estimator_model(dim_state, lr=0.0001, filename='estimator'), load('scaler.bin'), # threshold=100 * 7.5e-6) errors = [] steps = [] x1s = [] for i in range(num_samples): if i % 10 == 0: print(i) env.reset() _, evals, this_x1, err = integrate_env(predictor, integrator, env, x0, x1) errors.append(np.mean(err)) steps.append(evals) x1s.append(this_x1) print(np.mean(steps)) print(np.mean(errors)) print(np.mean(x1s)) print(np.quantile(errors, 0.9)) np.save('pareto_model_mem1.npy', np.array([np.mean(errors), np.mean(steps)])) plt.hist(errors, 25) plt.show()
def compare(): """ compare predictor to adaptive simpson rule based on an average performance on a sample of functions """ x0 = 0.0 num_samples = 200 error_predictor = 0.0 error_simpson = 0.0 step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=SuperposeSinus(5), max_iterations=256, initial_step_size=0.2, step_sizes=step_sizes, error_tol=0.0005) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor'), load('scaler.bin')) for i in range(num_samples): if i % 10 == 0: print(i) x1 = 200.0 env.reset() integ_pred, evals, x1, _ = integrate_env(predictor, Simpson(), env, x0, x1) integ = env.fun.integral(x0, x1) env.reset(reset_params=False) # asr = AdaptSimpsConstEvals(env.fun, x0, x1) # integ_simps = asr(evals) asr = Simps(env.fun, x0, x1) integ_simps = asr(num_evals=evals) error_predictor += abs(integ - integ_pred) error_simpson += abs(integ - integ_simps) error_simpson /= num_samples error_predictor /= num_samples print('Predictor error: {}'.format(error_predictor)) print('ASR error: {}'.format(error_simpson))
def one_fun(): x0 = 0.0 x1 = 10.0 step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.15, step_sizes=step_sizes, error_tol=0.0005) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor'), load('scaler.bin')) integ, evals, x1, _ = integrate_env(predictor, Simpson(), env, x0, x1, plot=True) print('new x1: {}'.format(x1)) print('Predictor error: {}'.format(abs(env.fun.integral(x0, x1) - integ))) print('Predictor evals: {}'.format(evals)) env.reset(reset_params=False) asr = AdaptSimpsConstEvals(env.fun, x0, x1) integ = asr(evals) print('ASR error: {}'.format(abs(env.fun.integral(x0, x1) - integ))) print('ASR evals: {}'.format(asr.evals)) asr.plot() env.reset(reset_params=False) simps = Simps(env.fun, x0, x1) integ_simps = simps(num_evals=evals) print('Simpson error: {}'.format( abs(env.fun.integral(x0, x1) - integ_simps))) print('Simpson evals: {}'.format(simps.evals)) simps.plot()
def main(): gamma = 0.0 num_episodes = 100000 # step_sizes = [0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.25, 0.4] step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] # step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67] dim_state = 3 # nodes per integration step dim_action = len(step_sizes) memory = 0 # how many integration steps the predictor can look back # 7.5e-6 env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.075, error_tol=7.5e-6, nodes_per_integ=dim_state, memory=memory, x0=0, max_dist=20, step_size_range=(step_sizes[0], step_sizes[-1])) # env = IntegrationEnv(fun=Sinus(), max_iterations=128, initial_step_size=0.1, step_sizes=step_sizes, # error_tol=0.0005, nodes_per_integ=dim_state, memory=memory) experience = Experience(batch_size=32) predictor = PredictorQ( step_sizes=step_sizes, model=build_value_model(dim_state=dim_state, dim_action=dim_action, filename=None, lr=0.00001, memory=memory), scaler=load('model_quad/model_sinus/Simpson/scaler.bin')) # integrator = IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin')) # integrator = Boole() integrator = Simpson() perf_tracker = PerformanceTracker(env, num_testfuns=1000, x0=-1, x1=1) # losses = [] # moving_average = [] for episode in range(num_episodes): state = env.reset() reward_total = 0 loss_this_episode = 0 steps = 0 done = False eps = 0.66 if episode < 0: # eps = 0.01 + (1.0 - 0.01) * math.exp(-0.023 * episode eps = 0.2 + 0.8 * 2.71828**( -0.0146068 * episode ) # decrease from 1.0 to approx 0.2 at episode 300 print('episode: {}'.format(episode)) while not done: # get action from actor actions = predictor.get_actions(state) if episode < 0: action = choose_action(actions, eps, dim_action) else: action = choose_action3(actions, eps, dim_action) step_size = predictor.action_to_stepsize(action) # execute action next_state, reward, done, _ = env.iterate(step_size, integrator) steps += 1 reward_total += reward # learning action_next_state = predictor.get_actions(next_state) target = reward + gamma * np.max(action_next_state) target_actions = actions.squeeze() target_actions[action] = target # print(target) # print('') experience.append(state=state, target=target_actions) if experience.is_full() or done: states, targets = experience.get_samples() loss_predictor = predictor.train_on_batch(states, targets) loss_this_episode += loss_predictor experience.reset() state = next_state print('reward: {}'.format(reward_total)) print('loss_predictor: {}'.format(loss_this_episode)) # losses.append(loss_this_episode) # if episode % 10 == 0 and len(losses) > 99: # moving_average.append(np.mean(losses[-100:])) # plt.plot(moving_average, 'r') # plt.pause(0.05) if episode % 100 == 0: perf_tracker.evaluate_performance(predictor, integrator) perf_tracker.plot() perf_tracker.plot_pareto(num_points=7) # if episode % 250 == 0: # env.plot(episode=episode, x_min=-1.5, x_max=1.5) if episode % 10 == 0: predictor.model.save_weights('predictor')
def compare_romberg(): x0 = 0.0 num_samples = 100 error_predictor = [] error_rom = [] evals_rom = [] evals_predictor = [] step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=Sinus(), max_iterations=256, initial_step_size=0.15, step_sizes=step_sizes, error_tol=0.0005) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor'), load('scaler.bin')) for i in range(num_samples): if i % 10 == 0: print(i) x1 = 20.0 # model env.reset() _, evals, x1, errors = integrate_env(predictor, Simpson(), env, x0, x1) error_pred_step = np.mean(errors) env.reset(reset_params=False) # romberg tol = 0.0003 rom = Romberg(env.fun, x0, x1, tol=tol, order=2) integ_rom, errors = rom(0.15, True) error_rom_step = np.mean(errors) evals_rom_step = rom.evals # for j in range(10): # rom = Romberg(env.fun, x0, x1, tol=tol, order=2) # integ_rom, errors = rom(0.15, True) # error_rom_step = np.mean(errors) # evals_rom_step = rom.evals # if error_rom_step < 0.0001: # tol *= 2.0 # elif error_rom_step > 0.0005: # tol /= 3.0 # else: # break error_predictor.append(error_pred_step) error_rom.append(error_rom_step) evals_predictor.append(evals) evals_rom.append(evals_rom_step) # error_rom = np.array(error_rom) # not_converged = np.concatenate((error_rom[error_rom > 0.0005], error_rom[error_rom < 0.0001])) # if len(not_converged > 0): # print('romberg did not converge in some cases:') # print(not_converged) mean_error_predictor = np.mean(error_predictor) var_error_predictor = np.var(error_predictor) mean_error_rom = np.mean(error_rom) var_error_rom = np.var(error_rom) mean_evals_predictor = np.mean(evals_predictor) var_evals_predictor = np.var(evals_predictor) mean_evals_rom = np.mean(evals_rom) var_evals_rom = np.var(evals_rom) print( 'Avg. predictor number of function evaluations per episode: {}'.format( mean_evals_predictor)) print('Avg. predictor error per step: {}'.format(mean_error_predictor)) print('Avg. romberg number of function evaluations per episode: {}'.format( mean_evals_rom)) print('Avg. romberg error per step: {}'.format(mean_error_rom)) print('') print( 'Variance of predictor number of function evaluations per episode: {}'. format(var_evals_predictor)) print( 'Variance of predictor error per step: {}'.format(var_error_predictor)) print('Variance of rom number of function evaluations per episode: {}'. format(var_evals_rom)) print('Variance of rom error per step: {}'.format(var_error_rom))
def one_fun(): x0 = -1 x1 = 1 # step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] step_sizes = [0.05, 0.075, 0.1, 0.125, 0.15, 0.2, 0.3, 0.67] dim_state = 3 dim_action = len(step_sizes) memory = 1 env = IntegrationEnv(fun=BrokenPolynomial(), max_iterations=256, initial_step_size=0.075, step_sizes=step_sizes, error_tol=7.5e-6, memory=memory) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor', memory=memory), load('scaler_mem1.bin')) # integrator = IntegratorLinReg(step_sizes, load('linreg_models.bin'), load('scaler.bin')) integrator = Simpson() # _, evals, _, errors = integrate_env(predictor, Simpson(), env, x0, x1, plot=True) # print('new x1: {}'.format(x1)) # print('Predictor error total: {}'.format(np.sum(errors))) # print('Predictor error per step: {}'.format(np.mean(errors))) # print('Predictor evals: {}'.format(evals)) # print('') env.reset(reset_params=False) _, evals, x1, errors = integrate_env(predictor, integrator, env, x0, x1, plot=True) print('new x1: {}'.format(x1)) print('Predictor error total: {}'.format(np.sum(errors))) print('Predictor error per step: {}'.format(np.mean(errors))) print('Predictor evals: {}'.format(evals)) print('') env.reset(reset_params=False) asr = AdaptSimpsConstEvals(env.fun, x0, x1) asr(200) errors = asr.stepwise_errors print('ASR error total: {}'.format(np.sum(errors))) print('ASR error per step: {}'.format(np.mean(errors))) print('ASR evals: {}'.format(asr.evals)) print('') asr.plot() env.reset(reset_params=False) simps = Simps(env.fun, x0, x1) integ_simps, errors = simps(num_evals=evals, stepwise_error=True) # integ_simps, errors = simps(step_size=0.11, stepwise_error=True) print('Simpson error total: {}'.format(np.sum(errors))) print('Simpson error per step: {}'.format(np.mean(errors))) print('Simpson evals: {}'.format(simps.evals)) print('') simps.plot() env.reset(reset_params=False) rom = Romberg(env.fun, x0, x1, tol=0.0005, order=2) integ, errors = rom(0.15, stepwise_errors=True) print('Romberg error total: {}'.format(np.sum(errors))) print('Romberg error per step: {}'.format(np.mean(errors))) print('Romberg evals: {}'.format(rom.evals)) rom.plot()
def compare_simps_tol(): """ compare model to simpson rule, stepsize for simps is adapted to match tol. """ x0 = 0.0 num_samples = 2000 error_predictor = [] error_simps = [] evals_simps = [] evals_predictor = [] step_sizes = [0.05, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75] # step_sizes = [0.05, 0.1, 0.125, 0.15, 0.175, 0.2, 0.225, 0.25, 0.3, 0.4] dim_state = 3 dim_action = len(step_sizes) env = IntegrationEnv(fun=SuperposeSinus(5), max_iterations=256, initial_step_size=0.15, step_sizes=step_sizes, error_tol=0.0005) predictor = PredictorQ( build_value_model(dim_state=dim_state, dim_action=dim_action, filename='predictor'), load('scaler.bin')) for i in range(num_samples): if i % 10 == 0: print(i) x1 = 20.0 # model env.reset() _, evals, x1, errors = integrate_env(predictor, Simpson(), env, x0, x1) error_pred_step = np.mean(errors) env.reset(reset_params=False) # simpson step_size = 0.177 simp = Simps(env.fun, x0, x1) _, error_simps_step = simp(step_size=step_size, stepwise_error=True) error_simps_step = np.mean(error_simps_step) evals_simps_step = simp.evals # for j in range(10): # simp = Simps(env.fun, x0, x1) # integ_rom, error_simps_step = simp(step_size=step_size, stepwise_error=True) # error_simps_step /= (simp.evals - 1.0) / 2.0 # evals_simps_step = simp.evals # if error_simps_step < 0.0001: # step_size *= 1.5 # elif error_simps_step > 0.0005: # step_size /= 2.0 # else: # break error_predictor.append(error_pred_step) error_simps.append(error_simps_step) evals_predictor.append(evals) evals_simps.append(evals_simps_step) error_simps = np.array(error_simps) not_converged = np.concatenate( (error_simps[error_simps > 0.0005], error_simps[error_simps < 0.0001])) if len(not_converged > 0): print('simps did not converge in some cases:') print(not_converged) mean_error_predictor = np.mean(error_predictor) var_error_predictor = np.var(error_predictor) mean_error_simps = np.mean(error_simps) var_error_simps = np.var(error_simps) mean_evals_predictor = np.mean(evals_predictor) var_evals_predictor = np.var(evals_predictor) mean_evals_simps = np.mean(evals_simps) var_evals_simps = np.var(evals_simps) print( 'Avg. predictor number of function evaluations per episode: {}'.format( mean_evals_predictor)) print('Avg. predictor error per step: {}'.format(mean_error_predictor)) print('Avg. simpson number of function evaluations per episode: {}'.format( mean_evals_simps)) print('Avg. simpson error per step: {}'.format(mean_error_simps)) print('') print( 'Variance of predictor number of function evaluations per episode: {}'. format(var_evals_predictor)) print( 'Variance of predictor error per step: {}'.format(var_error_predictor)) print('Variance of simpson number of function evaluations per episode: {}'. format(var_evals_simps)) print('Variance of simpson error per step: {}'.format(var_error_simps))