log_interval_policy = 10
exp_name ='PILCO'
num_exp =1
log_name = 'train._{}_lrp{}_drop{}-EXP_{}_GRU'.format( exp_name,args.lr_policy,
                                                     args.drop_p, num_exp )
num_iter_algo =args.num_iter_algo

num_iter_policy = args.num_iter_policy
grad_clip = 1

K = 20

# Create log files
log_dir = utils.configure_log_dir(env_name, txt=log_name,  No_time = False)
logger = utils.Logger(log_dir, csvname='log_loss')

logging_output(log_dir)
# save args prameters
with open(log_dir + '/info.txt', 'wt') as f:
    print('Hello World!\n', file=f)
    print(args, file=f)
 
# Set up environment
env = gym.make(env_name)

# Create dynamics model
dynamics = BNN3(env, hidden_size=[hidden_size] * num_hidden_layers, drop_prob=drop_p, activation= net_activation, shaping_state_delta = shaping_state_delta).cuda()
dynamics_optimizer =  torch.optim.Adam(dynamics.parameters(), lr= lr_dynamics, weight_decay=dyn_reg2 )
Exemple #2
0
# param check
if dyn_batch_size > max_timestep:
    assert log.error(
        'Hyper param error: dyn_batch_size must not be more than max_timestep.'
    )

# Exp paramaters
log_interval_policy = 100
exp_name = 'Train_policy'
num_exp = args.exp_num
log_name = 'train._{} _drop{}_nrd{}-EXP_{}'.format(exp_name, drop_p, n_rnd,
                                                   num_exp)

# Create log files
log_dir = utils.configure_log_dir(env_name,
                                  txt=log_name,
                                  No_time=False,
                                  log_group=exp_group_dir)
logger = utils.Logger(log_dir, csvname='log')
logging_output(log_dir)

# save args prameters
with open(log_dir + '/info.txt', 'wt') as f:
    print('Hello World!\n', file=f)
    print(args, file=f)

# Set up environment
env = gym.make(env_name)

# Create dynamics model
dynamics = BNN3(env,
                hidden_size=[hidden_size] * num_hidden_layers,