log_interval_policy = 10 exp_name ='PILCO' num_exp =1 log_name = 'train._{}_lrp{}_drop{}-EXP_{}_GRU'.format( exp_name,args.lr_policy, args.drop_p, num_exp ) num_iter_algo =args.num_iter_algo num_iter_policy = args.num_iter_policy grad_clip = 1 K = 20 # Create log files log_dir = utils.configure_log_dir(env_name, txt=log_name, No_time = False) logger = utils.Logger(log_dir, csvname='log_loss') logging_output(log_dir) # save args prameters with open(log_dir + '/info.txt', 'wt') as f: print('Hello World!\n', file=f) print(args, file=f) # Set up environment env = gym.make(env_name) # Create dynamics model dynamics = BNN3(env, hidden_size=[hidden_size] * num_hidden_layers, drop_prob=drop_p, activation= net_activation, shaping_state_delta = shaping_state_delta).cuda() dynamics_optimizer = torch.optim.Adam(dynamics.parameters(), lr= lr_dynamics, weight_decay=dyn_reg2 )
# param check if dyn_batch_size > max_timestep: assert log.error( 'Hyper param error: dyn_batch_size must not be more than max_timestep.' ) # Exp paramaters log_interval_policy = 100 exp_name = 'Train_policy' num_exp = args.exp_num log_name = 'train._{} _drop{}_nrd{}-EXP_{}'.format(exp_name, drop_p, n_rnd, num_exp) # Create log files log_dir = utils.configure_log_dir(env_name, txt=log_name, No_time=False, log_group=exp_group_dir) logger = utils.Logger(log_dir, csvname='log') logging_output(log_dir) # save args prameters with open(log_dir + '/info.txt', 'wt') as f: print('Hello World!\n', file=f) print(args, file=f) # Set up environment env = gym.make(env_name) # Create dynamics model dynamics = BNN3(env, hidden_size=[hidden_size] * num_hidden_layers,