Esempio n. 1
0
             log_dir=os.path.join(args.log, 'movie'),
             record_video=args.record)
env.env.seed(args.seed)
if args.c2d:
    env = C2DEnv(env)

observation_space = env.observation_space
action_space = env.action_space

if args.rnn:
    pol_net = PolNetLSTM(observation_space,
                         action_space,
                         h_size=256,
                         cell_size=256)
else:
    pol_net = PolNet(observation_space, action_space)
if isinstance(action_space, gym.spaces.Box):
    pol = GaussianPol(observation_space,
                      action_space,
                      pol_net,
                      args.rnn,
                      data_parallel=args.data_parallel,
                      parallel_dim=1 if args.rnn else 0)
elif isinstance(action_space, gym.spaces.Discrete):
    pol = CategoricalPol(observation_space,
                         action_space,
                         pol_net,
                         args.rnn,
                         data_parallel=args.data_parallel,
                         parallel_dim=1 if args.rnn else 0)
elif isinstance(action_space, gym.spaces.MultiDiscrete):
set_device(device)

score_file = os.path.join(args.log, 'progress.csv')
logger.add_tabular_output(score_file)

env = GymEnv(args.env_name,
             log_dir=os.path.join(args.log, 'movie'),
             record_video=args.record)
env.env.seed(args.seed)
if args.c2d:
    env = C2DEnv(env)

observation_space = env.observation_space
action_space = env.action_space

pol_net = PolNet(observation_space, action_space)
if isinstance(action_space, gym.spaces.Box):
    pol = GaussianPol(observation_space,
                      action_space,
                      pol_net,
                      data_parallel=args.data_parallel)
elif isinstance(action_space, gym.spaces.Discrete):
    pol = CategoricalPol(observation_space,
                         action_space,
                         pol_net,
                         data_parallel=args.data_parallel)
elif isinstance(action_space, gym.spaces.MultiDiscrete):
    pol = MultiCategoricalPol(observation_space,
                              action_space,
                              pol_net,
                              data_parallel=args.data_parallel)
device_name = 'cpu' if args.cuda < 0 else "cuda:{}".format(args.cuda)
device = torch.device(device_name)
set_device(device)

score_file = os.path.join(args.log, 'progress.csv')
logger.add_tabular_output(score_file)

env = GymEnv(args.env_name, log_dir=os.path.join(
    args.log, 'movie'), record_video=args.record)
env.env.seed(args.seed)

observation_space = env.observation_space
action_space = env.action_space

pol_net = PolNet(observation_space, action_space)
pol = GaussianPol(observation_space, action_space, pol_net)

targ_pol_net = PolNet(observation_space, action_space)
targ_pol_net.load_state_dict(pol_net.state_dict())
targ_pol = GaussianPol(observation_space, action_space, targ_pol_net)

qf_net = QNet(observation_space, action_space)
qf = DeterministicSAVfunc(observation_space, action_space, qf_net)

targ_qf_net = QNet(observation_space, action_space)
targ_qf_net.load_state_dict(targ_qf_net.state_dict())
targ_qf = DeterministicSAVfunc(observation_space, action_space, targ_qf_net)


sampler = EpiSampler(env, pol, args.num_parallel, seed=args.seed)