def init_env(args, agent = None): ''' This function loads the precomputed solutions in '../weno_solutions' and build the corresponding training/testing environments. Arg args(python namespace): storing all necessary arguments for the whole training procedure. Arg agent(class DDPG/DQN/etc object): needed for RK4 temporal scheme version 4 TODO: read different fluxes. ''' solution_dir = '../weno_solutions/' solution_files = os.listdir(solution_dir) assert(len(solution_files) % 2 == 0) train_num = len(solution_files) print('train_num: ', train_num) argss = [copy.copy(args) for i in range(train_num)] train_env = [] test_env = [] ### traverse all the solution files and build corresponding training/testing environment i = 0 while(i < train_num): print('load file No. ', i // 2) print('precise solution file: ', solution_files[i+1]) print('weno coarse solution file: ', solution_files[i]) precise_solution_file = solution_files[i+1] weno_solution_file = solution_files[i] precise_init_end = precise_solution_file.find('-precise') init = precise_solution_file[:precise_init_end] l = init.split(';') a = float(l[0]) b = float(l[1]) func = np.sin if l[2] == 'sin' else np.cos c = int(l[3]) argss[i].init = '{0} + {1}{2}({3} * \\pi * x)'.format(round(a,3), round(b,3), l[2], c) print(argss[i].init) ### different mode has different level of training difficulty, thus use different T. if args.mode == 'eno': argss[i].T = 0.5 elif args.mode == 'continuous_filter': argss[i].T = 0.2 elif args.mode == 'compute_flux': argss[i].T = 0.3 elif args.mode == 'weno_coef': argss[i].T = 0.3 elif args.mode == 'weno_coef_four': argss[i].T = 0.8 ### training envs evolving steps argss[i].dt = argss[i].dx * args.cfl precise_num_t = int(argss[i].T / (args.precise_dx * args.cfl)) + 10 num_t = int(argss[i].T / argss[i].dt) + 10 ### build the training envs init_condition = construct_init_condition(a, b, func, c) precise_solution = np.load(solution_dir + precise_solution_file) weno_solution = np.load(solution_dir + weno_solution_file) train_env.append(Burgers(args = argss[i], init_func=init_condition, agent = agent)) train_env[-1].precise_weno_solutions = precise_solution[:precise_num_t] train_env[-1].weno_coarse_grid = weno_solution[:num_t] ### build the test envs precise_num_t = int(args.T / (args.precise_dx * args.cfl)) + 10 num_t = int(args.T / argss[i].dt) + 10 test_env.append(Burgers(args = argss[i], init_func=init_condition, agent = agent)) test_env[-1].precise_weno_solutions = precise_solution[:precise_num_t] test_env[-1].weno_coarse_grid = weno_solution[:num_t] ### incresement 2, 1 for precise solution, 1 for weno solution under coarse grid. i += 2 ### .mp4 stroing needs to remove special characters in the file name if args.animation: for x in test_env: init = x.args.init new_init = '' for s in init: if s == ' ' or s == '\\' or s == '(' or s == ')': pass elif s == '+': new_init += 'p' elif s == '-': new_init += 'm' elif s == '*': new_init += '_' else: new_init += s print(new_init) x.args.init = new_init args.num_train = len(train_env) args.num_test = len(test_env) return argss, train_env, test_env
if args.forcing: vv['eta'] = 0.01 vv['solution_data_path'] = 'data/local/solutions/9-24-50-eta-0.01-forcing-1' elif args.flux == 'u4': vv['flux'] = 'u4' vv['solution_data_path'] = 'data/local/solutions/9-24-50-u4-eta-0-forcing-0' ran = range(0, 25) # vv['policy_hidden_layers'] = [64, 64, 64, 64, 64, 64] # vv['state_mode'] = 'normalize' ddpg = DDPG(vv, GaussNoise(initial_sig=vv['noise_beg'], final_sig=vv['noise_end'])) agent = ddpg agent.load(osp.join(path, epoch), actor_only=True) # agent.load(osp.join('data/local', '6150'), actor_only=True) env = Burgers(vv, agent=agent) # ptu.set_gpu_mode(True) dx = args.dx if not args.forcing else args.dx * np.pi beg = time.time() num_t = int(0.9 * 10 / dx) if not args.forcing else int(0.9 * np.pi * 10 / dx) for solution_idx in ran: print("solution_idx: ", solution_idx) pre_state = env.reset(solution_idx=solution_idx, num_t=num_t, dx=dx) # pre_state = env.reset(solution_idx=solution_idx, num_t=200) horizon = env.num_t for t in range(1, horizon): # print(t) action = agent.action(
####init_idx, dx, dt ,Tscheme, flux, frame for lines in param: print(lines) data = lines.split(' ') init = init_funcs[int(data[0].replace('\n', ''))] init_name1 = init_name[int(data[0])] exp_args = copy.copy(args) exp_args.T = args.T exp_args.dx = float(data[1]) exp_args.init = init_name1 exp_args.dt = float(data[2]) exp_args.Tscheme = data[3] exp_args.flux = data[4] exp_args.save_RL_weno_animation_path = args.save_figure_path + 'figure/' print(exp_args.save_RL_weno_animation_path) env = Burgers(exp_args, init, agent=agent) env.get_weno_precise() env.get_weno_corase() errors = DDPG_test(agent, [env], exp_args) save_figure(env, int(data[5].replace('\n', ''))) ''' #for a sure dx dt Tscheme give error dx=0.02 dt=dx*args.cfl Tscheme=args.Tscheme rl_error_set=[] coarse_error_set=[] rl_error_set_break=[] coarse_error_set_break=[] for i in range(10): exp_args=copy.copy(args)
for dt in dt_set: RL_error_set = [] coarse_error_set = [] #if dt/dx>args.cfl: # print("{} {} no".format(dx,dt)) # answerfile.write("RL {} {} no no coarse {} {} no no\n".format(dx,dt,dx,dt)) # continue for i in range(init_num): exp_args = copy.copy(args) exp_args.T = args.T exp_args.dx = dx exp_args.dt = dt exp_args.init = init_name[i] exp_args.Tscheme = args.Tscheme env = Burgers(exp_args, init_funcs[i], agent=agent) env.get_weno_precise() env.get_weno_corase() errors = DDPG_test(agent, [env], exp_args) coarse_error = np.zeros(env.num_t) RL_error = np.zeros(env.num_t) for i in range(env.num_t): coarse_error[i] = env.relative_error( env.get_precise_value(i * env.dt), env.weno_coarse_grid[i]) RL_error[i] = env.relative_error( env.get_precise_value(i * env.dt), env.RLgrid[i]) RL_mean_error = np.mean(RL_error) coarse_mean_error = np.mean(coarse_error) RL_error_set.append(RL_mean_error) coarse_error_set.append(coarse_mean_error) RL_error_set = np.array(RL_error_set)
def init_env(args, agent=None): ''' This function initializes and returns the training and test Burgers environments. 1) hand-set the initial function name, the solution plot y-axis limit, the training env grid size dx, evolving time T, and choose the training env idxes. 2) For each enviroment, load pre-stored solutions or compute and store the solutions. The solutions include the precise solutions (computed using weno with dense grids), and the weno solutions computed under the same grid size as the RL agent. ### Arguments: args (python namespace variable): A namespace variable that stores all necessary parameters for the whole training procedure. agent (RL agent object, optional): A RL agent. It will be passed to the Burgers Env object, and will be used when the temporal scheme is RK4. ### Return: argss (list of python namespace variables): A list that contains the args of each training/test environment. argss[i] is almost the same as the input args, except these domains are modified: .init, .dx, .dt, .T, for training purposes. train_env (list of class Burgers objects): A list of the well-initialized Burgers Training environments. Each has different initial conditions, dx, dt, T. test_env (list of class Burgers objects): A list of the well-initialized Burgers Test environments. Each has different initial conditions, but the same dx, dt, T as the command line argument. ''' argss = [copy.copy(args) for i in range(15)] ### set the name of the initial conditions of each training/test environment. argss[0].init = '1;1;cos;6' #'2_2cos2'##'0.5_m2cos4' argss[1].init = '-1;1;cos;6' #'m1_m3sin2'##'-1_p2sin4' argss[2].init = '-1.5;2;sin;6' argss[3].init = '1.5;-1.5;sin;6' argss[4].init = '-1.5;2;cos;6' argss[5].init = '1.5;-1.5;cos;6' argss[6].init = 'twobreak' argss[7].init = '0.5_sin2' argss[8].init = '-1_sin2' argss[9].init = '-1_2.5cos4' argss[10].init = '0.2_m2sin4' argss[11].init = 'rarefraction' ### set the y-axis limit when plotting the solution. argss[0].plot_y_low, argss[1].plot_y_low, argss[2].plot_y_low, argss[ 3].plot_y_low = -2, -4.5, -4, -0.5 argss[4].plot_y_low, argss[5].plot_y_low, argss[6].plot_y_low, argss[ 7].plot_y_low = -5, -1.5, -1, -6 argss[8].plot_y_low, argss[9].plot_y_low, argss[ 10].plot_y_low = -1.5, 0, -2.5 argss[0].plot_y_high, argss[1].plot_y_high, argss[2].plot_y_high, argss[ 3].plot_y_high = 3, 1, 1, 3.5 argss[4].plot_y_high, argss[5].plot_y_high, argss[6].plot_y_high, argss[ 7].plot_y_high = 1, 5.5, 3, 1 argss[8].plot_y_high, argss[9].plot_y_high, argss[ 10].plot_y_high = 3.5, 4, 0.5 train_env = [] test_env = [] if not os.path.exists('../weno_solutions/'): os.makedirs('../weno_solutions/') ### set the test environments ### version 4 TODO: add the test environment idxes, similar to the train_idxes. for i in range(args.num_test): argss[i].initial_t = args.initial_t argss[i].T = args.T if args.test: ### at testing, set grid size following the command line args argss[i].dx = args.dx argss[i].dt = args.dx * args.cfl test_env.append( Burgers(args=argss[i], init_func=init_funcs[i], agent=agent)) ### first try to load the pre-stored solutions if there exist, otherwise compute and store the solutions. # precise solutions try: dense_solutions = np.load( '../weno_solutions/{}-precise-{}-{}.npy'.format( argss[i].init, args.flux, args.cfl)) precise_num_t = int(argss[i].T / (argss[i].precise_dx * args.cfl)) + 1 test_env[ -1].precise_weno_solutions = dense_solutions[:precise_num_t] except FileNotFoundError: print('{} build precise weno solutions, flux {} cfl {}'.format( argss[i].init, args.flux, args.cfl)) test_env[-1].get_weno_precise() # test_env[-1].save_weno_precise() ### version 4 TODO, move the save parts in Burgers Env here. np.save( '../weno_solutions/{}-precise-{}-{}'.format( argss[i].init, args.flux, args.cfl), test_env[-1].precise_weno_solutions) # weno solutions with the same grid size try: coarse_solutions = np.load( '../weno_solutions/{}-coarse-{}-{}-{}-{}.npy'.format( argss[i].init, args.Tscheme, args.dx, args.flux, args.cfl)) coarse_num_t = int(argss[i].T / (argss[i].dx * args.cfl)) + 1 test_env[-1].weno_coarse_grid = coarse_solutions[:coarse_num_t] except FileNotFoundError: print( '{} build coarse weno solutions with time scheme {} dx {} flux {} cfl {}' .format(argss[i].init, args.Tscheme, args.dx, args.flux, args.cfl)) test_env[-1].get_weno_corase() # test_env[-1].save_weno_coarse() ### version 4 TODO, move the save parts in Burgers Env here. np.save( '../weno_solutions/{}-coarse-{}-{}-{}-{}'.format( argss[i].init, args.Tscheme, args.dx, args.flux, args.cfl), test_env[-1].weno_coarse_grid) ### For training environment, individually set the grid size for each initial condition argss[0].T, argss[1].T, argss[2].T, argss[3].T, argss[4].T, argss[5].T, argss[6].T = 0.8, \ 0.8, 0.8, 0.8, 0.8, 0.8, 0.8 argss[7].T, argss[8].T, argss[9].T, argss[10].T = 0.4, 0.4, 0.4, 0.4 argss[0].dx, argss[1].dx, argss[2].dx, argss[3].dx, argss[4].dx, argss[5].dx, argss[6].dx = 0.02, \ 0.02, 0.02, 0.02, 0.02, 0.02, 0.02 argss[7].dx, argss[8].dx, argss[9].dx, argss[ 10].dx = 0.02, 0.02, 0.02, 0.02 for i in range(len(init_funcs)): argss[i].dt = argss[i].dx * args.cfl ### set the training environment if not args.test: train_idxes = [0, 1, 2, 3, 4, 5] # hand-choose the training environment idxes. for i in train_idxes: train_env.append( Burgers(args=argss[i], init_func=init_funcs[i], agent=agent)) ### load the solutions if they are pre-computed and stored, otherwise compute and store them. try: dense_solutions = np.load( '../weno_solutions/{}-precise-{}-{}.npy'.format( argss[i].init, args.flux, args.cfl)) precise_num_t = int(argss[i].T / (argss[i].precise_dx * args.cfl)) + 1 train_env[ -1].precise_weno_solutions = dense_solutions[: precise_num_t] except FileNotFoundError: train_env[-1].get_weno_precise() try: coarse_solutions = np.load( '../weno_solutions/{}-coarse-{}-{}-{}-{}'.format( argss[i].init, args.Tscheme, args.dx, args.flux, args.cfl)) coarse_num_t = int(argss[i].T / (argss[i].dx * args.cfl)) + 1 train_env[ -1].weno_coarse_grid = coarse_solutions[:coarse_num_t] except FileNotFoundError: train_env[-1].get_weno_corase() args.num_train = len(train_env) return argss, train_env, test_env
def run_task(vv, log_dir, exp_name): import torch import numpy as np import copy import os, sys import time import math import random import json from get_args import get_args from DDPG.train_util import DDPG_train, DDPG_test from DDPG.DDPG_new import DDPG from DDPG.util import GaussNoise from chester import logger from BurgersEnv.Burgers import Burgers import utils.ptu as ptu if torch.cuda.is_available(): ptu.set_gpu_mode(True) ### dump vv logger.configure(dir=log_dir, exp_name=exp_name) with open(os.path.join(logger.get_dir(), 'variant.json'), 'w') as f: json.dump(vv, f, indent=2, sort_keys=True) ### load vv ddpg_load_epoch = None if vv['load_path'] is not None: solution_data_path = vv['solution_data_path'] dx = vv['dx'] test_interval = vv['test_interval'] load_path = os.path.join('data/local', vv['load_path']) ddpg_load_epoch = str(vv['load_epoch']) with open(os.path.join(load_path, 'variant.json'), 'r') as f: vv = json.load(f) vv['noise_beg'] = 0.1 vv['solution_data_path'] = solution_data_path vv['test_interval'] = test_interval if vv.get('dx') is None: vv['dx'] = dx ### Important: fix numpy and torch seed! seed = vv['seed'] torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) random.seed(seed) ### Initialize RL agents ddpg = DDPG( vv, GaussNoise(initial_sig=vv['noise_beg'], final_sig=vv['noise_end'])) agent = ddpg if ddpg_load_epoch is not None: print("load ddpg models from {}".format( os.path.join(load_path, ddpg_load_epoch))) agent.load(os.path.join(load_path, ddpg_load_epoch)) ### Initialize training and testing encironments env = Burgers(vv, agent=agent) ### train models print('begining training!') DDPG_train(vv, env, agent)