Ejemplo n.º 1
0
                        default='default',
                        type=str,
                        help='Resuming model path for testing')
    # parser.add_argument('--l2norm', default=0.01, type=float, help='l2 weight decay') # TODO
    # parser.add_argument('--cuda', dest='cuda', action='store_true') # TODO

    args = parser.parse_args()
    args.output = get_output_folder(args.output, args.env)
    if args.resume == 'default':
        args.resume = 'output/{}-run0'.format(args.env)

    env = NormalizedEnv(gym.make(args.env))

    if args.seed > 0:
        np.random.seed(args.seed)
        env.seed(args.seed)

    nb_states = env.observation_space.shape[0]
    nb_actions = env.action_space.shape[0]

    agent = DDPG(nb_states, nb_actions, args)
    evaluate = Evaluator(args.validate_episodes,
                         args.validate_steps,
                         args.output,
                         max_episode_length=args.max_episode_length)

    if args.mode == 'train':
        train(args.train_iter,
              agent,
              env,
              evaluate,
Ejemplo n.º 2
0
parser.add_argument('--exploration_noise', default=0.1, type=float)
parser.add_argument('--max_episode', default=10000, type=int) # num of games
parser.add_argument('--num_episode', default=0, type=int)
parser.add_argument('--print_log', default=5, type=int)
parser.add_argument('--update_iteration', default=200, type=int)
args = parser.parse_args()

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
print('env:', args.env_name)
print('seed:', args.random_seed)
script_name = os.path.basename(__file__)
eps = np.finfo(np.float32).eps
env = NormalizedEnv(gym.make(args.env_name))

if args.seed:
    env.seed(args.random_seed)
    torch.manual_seed(args.random_seed)
    np.random.seed(args.random_seed)

state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])
min_Val = torch.tensor(1e-7).float().to(device) # min value

directory = './exp'+ script_name +'Seed'+str(args.random_seed)+ args.env_name +'./'

def normal_R_V(R_, current_Q, reward):
    R_ = np.array(R_)
    R_ = (R_ - R_.mean()) / (R_.std() + eps.item())
    value = (current_Q - reward).cpu().detach().numpy()
    value = ((value - value.mean()) / (value.std() + eps.item())).mean()
Ejemplo n.º 3
0
        print('Writing to {}'.format(args.output))

    writer = SummaryWriter(args.output)
    with open(os.path.join(args.output, 'cmdline.txt'), 'a') as f:
        f.write(' '.join(sys.argv) + '\n')

    bullet = ("Bullet" in args.env)
    if bullet:
        import pybullet
        import pybullet_envs
        
    env = NormalizedEnv(gym.make(args.env))

    # input random seed
    if args.seed > 0:
        np.random.seed(args.seed)
        torch.manual_seed(args.seed)
        random.seed(args.seed)
        env.seed(args.seed)
        if args.cuda:
            torch.cuda.manual_seed(args.seed)

    # input status count & actions count
    print('observation_space', env.observation_space.shape, 'action_space', env.action_space.shape)
    nb_status = env.observation_space.shape[0]
    nb_actions = env.action_space.shape[0]
    
    agent = DDPG(nb_status, nb_actions, args, writer)

    train(args.train_iter, agent, env)