parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda") parser.add_argument("-n", type=int, default=STEP_COUNT, help="Steps to do on Bellman unroll") args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") env = gym.make(params.env_name) env = drl.common.wrappers.wrap_dqn(env) env.seed(common.SEED) input_shape = env.observation_space.shape n_actions = env.action_space.n selector = dac.EpsilonGreedySelector() eps_tracker = dac.EpsilonTracker(selector, params.epsilon_start, params.epsilon_final, params.epsilon_frames) net = dqn_model.DQN(input_shape, n_actions).to(device) agent = dag.DQNAgent(net, selector, device) tgt_net = dag.TargetNet(net) buffer = dexp.ReplayBuffer(params.replay_size) exp_source = dexp.ExperienceSource(env, agent, buffer, args.n, params.gamma) writer = SummaryWriter(comment="-" + params.env_name) print(net) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) total_reward = [] frame_idx = 0 ts_frame = 0 ts = time.time() best_m_reward = None
args = parser.parse_args() device = torch.device("cuda" if args.cuda else "cpu") env = gym.make(params.env_name) env = drl.common.wrappers.wrap_dqn(env) env.seed(common.SEED) input_shape = env.observation_space.shape n_actions = env.action_space.n selector = dac.EpsilonGreedySelector() eps_tracker = dac.EpsilonTracker(selector, params.epsilon_start, params.epsilon_final, params.epsilon_frames) net = dqn_extra.DistributionDQN(input_shape, n_actions).to(device) agent = dag.DQNAgent(lambda x: net.qvals(x), selector, device) tgt_net = dag.TargetNet(net) buffer = dexp.ReplayBuffer(params.replay_size) exp_source = dexp.ExperienceSource(env, agent, buffer, 1, params.gamma) writer = SummaryWriter(comment="-" + params.env_name) print(net) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) total_reward = [] frame_idx = 0 ts_frame = 0 ts = time.time() best_m_reward = None