selector = ptan.actions.EpsilonGreedyActionSelector(epsilon=params.epsilon_start) epsilon_tracker = epsilon_tracker.EpsilonTracker(selector, params) agent = ptan.agent.DQNAgent(net, selector, device=device) exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=params.gamma) buffer = ptan.experience.ExperienceReplayBuffer(exp_source, buffer_size=params.replay_size) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) def process_batch(engine_for_batch, batch): optimizer.zero_grad() loss_v = utils.calc_loss_dqn(batch, net, target_net.target_model, gamma=params.gamma, device=device) loss_v.backward() optimizer.step() epsilon_tracker.frame(engine_for_batch.state.iteration) if engine_for_batch.state.iteration % params.target_net_sync == 0: target_net.sync() return { "loss": loss_v.item(), "epsilon": selector.epsilon, } engine = Engine(process_batch) utils.setup_ignite(engine, params, exp_source, "01_DQN_Baseline") engine.run(utils.batch_generator(buffer, params.replay_initial, params.batch_size))
batch, batch_indices, batch_weights = batch_data optimizer.zero_grad() loss_v, sample_priority = calc_loss(batch, batch_weights, net, target_net.target_model, gamma=params.gamma, _device=str(device)) loss_v.backward() optimizer.step() buffer.update_priorities(batch_indices, sample_priority) epsilon_tracker.frame(engine_for_batch.state.iteration) if engine_for_batch.state.iteration % params.target_net_sync == 0: target_net.sync() return { "loss": loss_v.item(), "epsilon": selector.epsilon, "beta": buffer.update_beta(engine.state.iteration), } engine = Engine(process_batch) utils.setup_ignite(engine, params, exp_source, "05_DQN_PER") engine.run( utils.batch_generator(buffer, params.replay_initial, params.batch_size))
loss_v = utils.calc_loss_dqn(batch, net, target_net.target_model, gamma=params.gamma, device=device) loss_v.backward() optimizer.step() epsilon_tracker.frame(engine_for_batch.state.iteration) if engine_for_batch.state.iteration % params.target_net_sync == 0: target_net.sync() if engine.state.iteration % EVAL_EVERY_FRAME == 0: eval_states = getattr(engine.state, "eval_states", None) if eval_states is None: eval_states = buffer.sample(STATES_TO_EVALUATE) eval_states = [np.array(transition.state, copy=False) for transition in eval_states] eval_states = np.array(eval_states, copy=False) engine.state.eval_states = eval_states evaluate_states(eval_states, net, device, engine) return { "loss": loss_v.item(), "epsilon": selector.epsilon, } engine = Engine(process_batch) utils.setup_ignite(engine, params, exp_source, "02_DQN_Dueling") engine.run(utils.batch_generator(buffer, params.replay_initial, params.batch_size))
def process_batch(engine_for_batch, batch_data): batch, batch_indices, batch_weights = batch_data optimizer.zero_grad() loss_v, sample_prios = calc_loss_prio(batch, batch_weights, net, target_net.target_model, gamma=params.gamma**N_STEPS, _device=device) loss_v.backward() optimizer.step() buffer.update_priorities(batch_indices, sample_prios) if engine_for_batch.state.iteration % params.target_net_sync == 0: target_net.sync() return { "loss": loss_v.item(), "beta": buffer.update_beta(engine.state.iteration), } engine = Engine(process_batch) utils.setup_ignite(engine, params, exp_source, "04_DQN_Rainbow") engine.run( utils.batch_generator(buffer, params.replay_initial, params.batch_size))
buffer = ptan.experience.ExperienceReplayBuffer( exp_source, buffer_size=params.replay_size) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) def process_batch(engine_for_batch, batch): optimizer.zero_grad() loss_v = calc_loss(batch, net, target_net.target_model, gamma=params.gamma, _device=device) loss_v.backward() optimizer.step() epsilon_tracker.frame(engine_for_batch.state.iteration) if engine_for_batch.state.iteration % params.target_net_sync == 0: target_net.sync() return { "loss": loss_v.item(), "epsilon": selector.epsilon, } engine = Engine(process_batch) utils.setup_ignite(engine, params, exp_source, "07_DQN_Categorical") engine.run( utils.batch_generator(buffer, params.replay_initial, params.batch_size))
if engine_for_batch.state.iteration % EVAL_EVERY_FRAME == 0: eval_states = getattr(engine_for_batch.state, "eval_states", None) if eval_states is None: eval_states = buffer.sample(STATES_TO_EVALUATE) eval_states = [ np.array(transition.state, copy=False) for transition in eval_states ] eval_states = np.array(eval_states, copy=False) engine_for_batch.state.eval_states = eval_states engine_for_batch.state.metrics[ "values"] = utils.calc_values_of_states( eval_states, net, device) return { "loss": loss_v.item(), "epsilon": selector.epsilon, } engine = Engine(process_batch) utils.setup_ignite(engine, params, exp_source, f"03_DQN_Double={args.double}", extra_metrics=('values', )) engine.run( utils.batch_generator(buffer, params.replay_initial, params.batch_size))
buffer = ptan.experience.ExperienceReplayBuffer( exp_source, buffer_size=params.replay_size) optimizer = optim.Adam(net.parameters(), lr=params.learning_rate) def process_batch(engine_for_batch, batch): optimizer.zero_grad() loss_v = utils.calc_loss_dqn(batch, net, target_net.target_model, gamma=params.gamma**args.n, device=device) loss_v.backward() optimizer.step() epsilon_tracker.frame(engine_for_batch.state.iteration) if engine_for_batch.state.iteration % params.target_net_sync == 0: target_net.sync() return { "loss": loss_v.item(), "epsilon": selector.epsilon, } engine = Engine(process_batch) utils.setup_ignite(engine, params, exp_source, f"02_N_Step={args.n}") engine.run( utils.batch_generator(buffer, params.replay_initial, params.batch_size))
loss_v = utils.calc_loss_dqn(batch, net, target_net.target_model, gamma=params.gamma, device=device) loss_v.backward() optimizer.step() if engine_for_batch.state.iteration % params.target_net_sync == 0: target_net.sync() if engine_for_batch.state.iteration % NOISY_SNR_EVERY_ITERS == 0: for layer_idx, sigma_l2 in enumerate(net.noisy_layers_sigma_snr()): engine_for_batch.state.metrics[ f'snr_{layer_idx + 1}'] = sigma_l2 return { "loss": loss_v.item(), } engine = Engine(process_batch) utils.setup_ignite(engine, params, exp_source, "04_DQN_NoisyNetwork", extra_metrics=('snr_1', 'snr_2')) engine.run( utils.batch_generator(buffer, params.replay_initial, params.batch_size))