selector = ptan.actions.EpsilonGreedyActionSelector(epsilon=params.epsilon_start)
    epsilon_tracker = epsilon_tracker.EpsilonTracker(selector, params)
    agent = ptan.agent.DQNAgent(net, selector, device=device)

    exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=params.gamma)
    buffer = ptan.experience.ExperienceReplayBuffer(exp_source, buffer_size=params.replay_size)
    optimizer = optim.Adam(net.parameters(), lr=params.learning_rate)


    def process_batch(engine_for_batch, batch):
        optimizer.zero_grad()

        loss_v = utils.calc_loss_dqn(batch, net, target_net.target_model, gamma=params.gamma, device=device)
        loss_v.backward()

        optimizer.step()
        epsilon_tracker.frame(engine_for_batch.state.iteration)

        if engine_for_batch.state.iteration % params.target_net_sync == 0:
            target_net.sync()

        return {
            "loss": loss_v.item(),
            "epsilon": selector.epsilon,
        }


    engine = Engine(process_batch)
    utils.setup_ignite(engine, params, exp_source, "01_DQN_Baseline")
    engine.run(utils.batch_generator(buffer, params.replay_initial, params.batch_size))
        batch, batch_indices, batch_weights = batch_data

        optimizer.zero_grad()

        loss_v, sample_priority = calc_loss(batch,
                                            batch_weights,
                                            net,
                                            target_net.target_model,
                                            gamma=params.gamma,
                                            _device=str(device))
        loss_v.backward()

        optimizer.step()
        buffer.update_priorities(batch_indices, sample_priority)
        epsilon_tracker.frame(engine_for_batch.state.iteration)

        if engine_for_batch.state.iteration % params.target_net_sync == 0:
            target_net.sync()

        return {
            "loss": loss_v.item(),
            "epsilon": selector.epsilon,
            "beta": buffer.update_beta(engine.state.iteration),
        }

    engine = Engine(process_batch)
    utils.setup_ignite(engine, params, exp_source, "05_DQN_PER")
    engine.run(
        utils.batch_generator(buffer, params.replay_initial,
                              params.batch_size))
        loss_v = utils.calc_loss_dqn(batch, net, target_net.target_model, gamma=params.gamma, device=device)
        loss_v.backward()

        optimizer.step()
        epsilon_tracker.frame(engine_for_batch.state.iteration)

        if engine_for_batch.state.iteration % params.target_net_sync == 0:
            target_net.sync()

        if engine.state.iteration % EVAL_EVERY_FRAME == 0:
            eval_states = getattr(engine.state, "eval_states", None)

            if eval_states is None:
                eval_states = buffer.sample(STATES_TO_EVALUATE)
                eval_states = [np.array(transition.state, copy=False) for transition in eval_states]
                eval_states = np.array(eval_states, copy=False)
                engine.state.eval_states = eval_states

            evaluate_states(eval_states, net, device, engine)

        return {
            "loss": loss_v.item(),
            "epsilon": selector.epsilon,
        }


    engine = Engine(process_batch)
    utils.setup_ignite(engine, params, exp_source, "02_DQN_Dueling")
    engine.run(utils.batch_generator(buffer, params.replay_initial, params.batch_size))
Example #4
0
    def process_batch(engine_for_batch, batch_data):
        batch, batch_indices, batch_weights = batch_data

        optimizer.zero_grad()

        loss_v, sample_prios = calc_loss_prio(batch,
                                              batch_weights,
                                              net,
                                              target_net.target_model,
                                              gamma=params.gamma**N_STEPS,
                                              _device=device)
        loss_v.backward()

        optimizer.step()
        buffer.update_priorities(batch_indices, sample_prios)

        if engine_for_batch.state.iteration % params.target_net_sync == 0:
            target_net.sync()

        return {
            "loss": loss_v.item(),
            "beta": buffer.update_beta(engine.state.iteration),
        }

    engine = Engine(process_batch)
    utils.setup_ignite(engine, params, exp_source, "04_DQN_Rainbow")
    engine.run(
        utils.batch_generator(buffer, params.replay_initial,
                              params.batch_size))
Example #5
0
    buffer = ptan.experience.ExperienceReplayBuffer(
        exp_source, buffer_size=params.replay_size)
    optimizer = optim.Adam(net.parameters(), lr=params.learning_rate)

    def process_batch(engine_for_batch, batch):
        optimizer.zero_grad()

        loss_v = calc_loss(batch,
                           net,
                           target_net.target_model,
                           gamma=params.gamma,
                           _device=device)
        loss_v.backward()

        optimizer.step()
        epsilon_tracker.frame(engine_for_batch.state.iteration)

        if engine_for_batch.state.iteration % params.target_net_sync == 0:
            target_net.sync()

        return {
            "loss": loss_v.item(),
            "epsilon": selector.epsilon,
        }

    engine = Engine(process_batch)
    utils.setup_ignite(engine, params, exp_source, "07_DQN_Categorical")
    engine.run(
        utils.batch_generator(buffer, params.replay_initial,
                              params.batch_size))
Example #6
0
        if engine_for_batch.state.iteration % EVAL_EVERY_FRAME == 0:
            eval_states = getattr(engine_for_batch.state, "eval_states", None)

            if eval_states is None:
                eval_states = buffer.sample(STATES_TO_EVALUATE)
                eval_states = [
                    np.array(transition.state, copy=False)
                    for transition in eval_states
                ]
                eval_states = np.array(eval_states, copy=False)
                engine_for_batch.state.eval_states = eval_states

            engine_for_batch.state.metrics[
                "values"] = utils.calc_values_of_states(
                    eval_states, net, device)

        return {
            "loss": loss_v.item(),
            "epsilon": selector.epsilon,
        }

    engine = Engine(process_batch)
    utils.setup_ignite(engine,
                       params,
                       exp_source,
                       f"03_DQN_Double={args.double}",
                       extra_metrics=('values', ))
    engine.run(
        utils.batch_generator(buffer, params.replay_initial,
                              params.batch_size))
    buffer = ptan.experience.ExperienceReplayBuffer(
        exp_source, buffer_size=params.replay_size)
    optimizer = optim.Adam(net.parameters(), lr=params.learning_rate)

    def process_batch(engine_for_batch, batch):
        optimizer.zero_grad()

        loss_v = utils.calc_loss_dqn(batch,
                                     net,
                                     target_net.target_model,
                                     gamma=params.gamma**args.n,
                                     device=device)
        loss_v.backward()

        optimizer.step()
        epsilon_tracker.frame(engine_for_batch.state.iteration)

        if engine_for_batch.state.iteration % params.target_net_sync == 0:
            target_net.sync()

        return {
            "loss": loss_v.item(),
            "epsilon": selector.epsilon,
        }

    engine = Engine(process_batch)
    utils.setup_ignite(engine, params, exp_source, f"02_N_Step={args.n}")
    engine.run(
        utils.batch_generator(buffer, params.replay_initial,
                              params.batch_size))
        loss_v = utils.calc_loss_dqn(batch,
                                     net,
                                     target_net.target_model,
                                     gamma=params.gamma,
                                     device=device)
        loss_v.backward()

        optimizer.step()

        if engine_for_batch.state.iteration % params.target_net_sync == 0:
            target_net.sync()

        if engine_for_batch.state.iteration % NOISY_SNR_EVERY_ITERS == 0:
            for layer_idx, sigma_l2 in enumerate(net.noisy_layers_sigma_snr()):
                engine_for_batch.state.metrics[
                    f'snr_{layer_idx + 1}'] = sigma_l2

        return {
            "loss": loss_v.item(),
        }

    engine = Engine(process_batch)
    utils.setup_ignite(engine,
                       params,
                       exp_source,
                       "04_DQN_NoisyNetwork",
                       extra_metrics=('snr_1', 'snr_2'))
    engine.run(
        utils.batch_generator(buffer, params.replay_initial,
                              params.batch_size))