log_dir=os.path.join(args.log, 'movie'), record_video=args.record) env.env.seed(args.seed) if args.c2d: env = C2DEnv(env) observation_space = env.observation_space action_space = env.action_space if args.rnn: pol_net = PolNetLSTM(observation_space, action_space, h_size=256, cell_size=256) else: pol_net = PolNet(observation_space, action_space) if isinstance(action_space, gym.spaces.Box): pol = GaussianPol(observation_space, action_space, pol_net, args.rnn, data_parallel=args.data_parallel, parallel_dim=1 if args.rnn else 0) elif isinstance(action_space, gym.spaces.Discrete): pol = CategoricalPol(observation_space, action_space, pol_net, args.rnn, data_parallel=args.data_parallel, parallel_dim=1 if args.rnn else 0) elif isinstance(action_space, gym.spaces.MultiDiscrete):
set_device(device) score_file = os.path.join(args.log, 'progress.csv') logger.add_tabular_output(score_file) env = GymEnv(args.env_name, log_dir=os.path.join(args.log, 'movie'), record_video=args.record) env.env.seed(args.seed) if args.c2d: env = C2DEnv(env) observation_space = env.observation_space action_space = env.action_space pol_net = PolNet(observation_space, action_space) if isinstance(action_space, gym.spaces.Box): pol = GaussianPol(observation_space, action_space, pol_net, data_parallel=args.data_parallel) elif isinstance(action_space, gym.spaces.Discrete): pol = CategoricalPol(observation_space, action_space, pol_net, data_parallel=args.data_parallel) elif isinstance(action_space, gym.spaces.MultiDiscrete): pol = MultiCategoricalPol(observation_space, action_space, pol_net, data_parallel=args.data_parallel)
device_name = 'cpu' if args.cuda < 0 else "cuda:{}".format(args.cuda) device = torch.device(device_name) set_device(device) score_file = os.path.join(args.log, 'progress.csv') logger.add_tabular_output(score_file) env = GymEnv(args.env_name, log_dir=os.path.join( args.log, 'movie'), record_video=args.record) env.env.seed(args.seed) observation_space = env.observation_space action_space = env.action_space pol_net = PolNet(observation_space, action_space) pol = GaussianPol(observation_space, action_space, pol_net) targ_pol_net = PolNet(observation_space, action_space) targ_pol_net.load_state_dict(pol_net.state_dict()) targ_pol = GaussianPol(observation_space, action_space, targ_pol_net) qf_net = QNet(observation_space, action_space) qf = DeterministicSAVfunc(observation_space, action_space, qf_net) targ_qf_net = QNet(observation_space, action_space) targ_qf_net.load_state_dict(targ_qf_net.state_dict()) targ_qf = DeterministicSAVfunc(observation_space, action_space, targ_qf_net) sampler = EpiSampler(env, pol, args.num_parallel, seed=args.seed)