예제 #1
0
                    help='number of episodes (default: 128)')
parser.add_argument('--updates_per_step',
                    type=int,
                    default=5,
                    metavar='N',
                    help='model updates per simulator step (default: 5)')
parser.add_argument('--num-stack',
                    type=int,
                    default=1,
                    help='number of frames to stack')
parser.add_argument('--model-suffix',
                    default="",
                    help='To resume training or not')
args = parser.parse_args()

env = NormalizedActions(gym.make(args.env_name))

writer = SummaryWriter()

env.seed(args.seed)
if torch.cuda.is_available():
    device = torch.device("cuda:0")
    torch.cuda.manual_seed(args.seed)
else:
    device = torch.device("cpu")
    torch.manual_seed(args.seed)

np.random.seed(args.seed)

obs_shape = env.observation_space.shape
obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])
예제 #2
0
parser.set_defaults(done_util=True)

parser.add_argument('--render', dest='render', action='store_true')
parser.add_argument('--no_render', dest='render', action='store_false')
parser.set_defaults(render=False)

parser.add_argument('--record', dest='record', action='store_true')
parser.add_argument('--no-record', dest='record', action='store_false')
parser.set_defaults(record=False)
args = parser.parse_args()

if __name__ == '__main__':

    env_name = args.env
    try:
        env = NormalizedActions(envs.env_list[env_name](render=args.render))
    except TypeError as err:
        print('no argument render,  assumping env.render will just work')
        env = NormalizedActions(envs.env_list[env_name]())

    assert np.any(np.abs(env.action_space.low) <= 1.) and np.any(
        np.abs(env.action_space.high) <= 1.), 'Action space not normalizd'

    if args.record:
        env = gym.wrappers.Monitor(env,
                                   './data/vid/mpc/{}-{}'.format(
                                       env_name, args.frame),
                                   force=True)
    env.reset()

    env.seed(args.seed)
예제 #3
0
if args.gpu >= 0:
    print("gpu ok")
    ptu.set_gpu_mode(True, args.gpu)
# set env
if args.env_name == 'Humanoidrllab':
    from rllab.envs.mujoco.humanoid_env import HumanoidEnv
    from rllab.envs.normalized_env import normalize
    env = normalize(HumanoidEnv())
    max_episode_steps = float('inf')
    if args.seed >= 0:
        global seed_
        seed_ = args.seed
else:
    env = gym.make(args.env_name)
    max_episode_steps=env._max_episode_steps
    env=NormalizedActions(env)
    if args.seed >= 0:
        env.seed(args.seed)
if args.seed >= 0:
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)
    torch.random.manual_seed(args.seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(args.seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

# set args
args.num_actions = env.action_space.shape[0]
args.max_action = env.action_space.high
예제 #4
0
파일: main.py 프로젝트: Liavbapp/NAF_torch
if __name__ == '__main__':
    env = sys.argv[1]
    args = None

    if env == 'mc':
        args = args_mc
    elif env == 'pd':
        args = args_pd
    elif env == 'll':
        args = args_ll
    else:
        print('Environment not selected, Please choose from: mc, pd,ll')
        exit(-1)

    env = NormalizedActions(gym.make(args['env_name']))

    env.seed(args['seed'])
    torch.manual_seed(args['seed'])
    np.random.seed(args['seed'])

    agent = NAF(args['gamma'], args['tau'], args['hidden_size'],
                env.observation_space.shape[0], env.action_space)
    agent.load_model(f'models/naf_{args["env_name"]}')

    replay_buffer = ReplayBuffer(args['replay_size'])

    ounoise = OUNoise(env.action_space.shape[0]) if args['ou_noise'] else None

    run()