Ejemplo n.º 1
0
    parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda")
    parser.add_argument("-n", type=int, default=STEP_COUNT, help="Steps to do on Bellman unroll")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    env = gym.make(params.env_name)
    env = drl.common.wrappers.wrap_dqn(env)
    env.seed(common.SEED)
    input_shape = env.observation_space.shape
    n_actions = env.action_space.n

    selector = dac.EpsilonGreedySelector()
    eps_tracker = dac.EpsilonTracker(selector, params.epsilon_start, params.epsilon_final, params.epsilon_frames)

    net = dqn_model.DQN(input_shape, n_actions).to(device)
    agent = dag.DQNAgent(net, selector, device)
    tgt_net = dag.TargetNet(net)

    buffer = dexp.ReplayBuffer(params.replay_size)
    exp_source = dexp.ExperienceSource(env, agent, buffer, args.n, params.gamma)

    writer = SummaryWriter(comment="-" + params.env_name)
    print(net)

    optimizer = optim.Adam(net.parameters(), lr=params.learning_rate)
    total_reward = []
    frame_idx = 0
    ts_frame = 0
    ts = time.time()
    best_m_reward = None
Ejemplo n.º 2
0
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    env = gym.make(params.env_name)
    env = drl.common.wrappers.wrap_dqn(env)
    env.seed(common.SEED)
    input_shape = env.observation_space.shape
    n_actions = env.action_space.n

    selector = dac.EpsilonGreedySelector()
    eps_tracker = dac.EpsilonTracker(selector, params.epsilon_start,
                                     params.epsilon_final,
                                     params.epsilon_frames)

    net = dqn_extra.DistributionDQN(input_shape, n_actions).to(device)
    agent = dag.DQNAgent(lambda x: net.qvals(x), selector, device)
    tgt_net = dag.TargetNet(net)

    buffer = dexp.ReplayBuffer(params.replay_size)
    exp_source = dexp.ExperienceSource(env, agent, buffer, 1, params.gamma)

    writer = SummaryWriter(comment="-" + params.env_name)
    print(net)

    optimizer = optim.Adam(net.parameters(), lr=params.learning_rate)
    total_reward = []
    frame_idx = 0
    ts_frame = 0
    ts = time.time()
    best_m_reward = None