Esempio n. 1
0
print('Using device %s' % device)

game = CartPoleVisual(w, h, t)
memory = PrioritizedReplayMemory(memory_size)
policy_net = DuelingDQN(w, h, t, len(game.actions)).to(device)

model = LearningModel(game=game,
                      memory=memory,
                      policy_net=policy_net,
                      target_net=DuelingDQN(w, h, t,
                                            len(game.actions)).to(device),
                      optimizer=RMSprop(policy_net.parameters()),
                      strategy_name='dueling_double_dq',
                      device=device)
print('Model prepared')

# %%
if load_checkpoint(model):
    play_and_remember_steps(model, hyperparams)
    print('Resuming completed')
else:
    pretrain(model, hyperparams)
    print('Pretraining finished')

# %%
train(model, hyperparams, 10)
save_checkpoint(model)

# %%
play_example(model.exec())
Esempio n. 2
0
                              target_net=target_net,
                              input_dtype=torch.half,
                              optimizer=optimizer,
                              strategy_name=strategy_name,
                              game_name=_game.name,
                              device=device,
                              status=TrainingStatus(summary_writer))
    print('Model prepared')

    # %%
    if load_checkpoint(model):
        print('Resuming')
    else:
        print('Starting fresh')

    # %%
    train(model,
          create_game,
          hyperparams,
          steps_to_train // hyperparams.game_steps_per_epoch,
          save_every=25 // episode_factor)
    save_checkpoint(model)

    with create_game() as game:
        print('Running validation')
        print_validation(model, game, 5)
    #   print('Playing example')
    #   play_example(model.exec(), game, 'final')

    print('Done.')
                                  game.actions)).to(device),
                              optimizer=Adam(policy_net.parameters(), lr=1e-4),
                              strategy_name='pdddqn',
                              game_name=game.name,
                              device=device)
    print('Model prepared')

    # %%
    if load_checkpoint(model):
        print('Resuming')
    else:
        print('Starting fresh')

    # %%
    if steps_to_train > 0:
        train(model,
              create_game,
              hyperparams,
              steps_to_train // episode_factor,
              save_every=25 // episode_factor,
              validation_episodes=5)
        save_checkpoint(model)

    with create_game() as game:
        print('Running validation')
        print_validation(model, game, 30)
        # print('Playing example')
        # play_example(model.exec(), game, 'final')

    print('Done.')
print('Using device %s' % device)

with create_game() as _game:
  memory = SimpleReplayMemory(memory_size)
  policy_net = DQN(w, h, t, len(_game.actions)).to(device)

  model = LearningModel(
    memory=memory,
    policy_net=policy_net,
    target_net=DQN(w, h, t, len(_game.actions)).to(device),
    optimizer=Adam(policy_net.parameters(), lr=1e-5),
    game_name=_game.name,
    strategy_name='30min',
    device=device
  )
print('Model prepared')

# %%
if load_checkpoint(model):
  print('Resuming')
else:
  print('Starting fresh')

# %%
train(model, create_game, hyperparams, 500000 // hyperparams.game_steps_per_epoch,
      validation_episodes=0, save_every=25, example_every=0)
save_checkpoint(model)

with create_game() as game:
  print_validation(model, game, 20)
Esempio n. 5
0
def train_with(device: torch.device, steps_to_train: int,
               game_steps_per_step: int, prio_memory: bool):
    episode_factor = 5
    w = h = 84
    t = 4
    memory_size = 50000
    batch_per_game_step = 32
    batch_size = game_steps_per_step * batch_per_game_step
    hyperparams = TrainingHyperparameters(
        gamma=0.99,
        beta=linear_increase(0.01 * episode_factor),
        exploration_rate=linear_decay(0.008 * episode_factor,
                                      max_value=1.,
                                      min_value=0.01),
        batch_size=batch_size,
        game_steps_per_step=game_steps_per_step,
        copy_to_target_every=1000,
        game_steps_per_epoch=1000 * episode_factor,
        multi_step_n=4,
        warmup_rounds=500,
        init_memory_steps=1000,
        parallel_game_processes=2,
        max_batches_prefetch=10,
        states_on_device=True)

    with create_game() as _game:
        strategy_name = 'floaton-steps%d-%s' % (game_steps_per_step, 'prm'
                                                if prio_memory else 'srm')
        if prio_memory:
            memory = PrioritizedReplayMemory(memory_size)
        else:
            memory = SimpleReplayMemory(memory_size)
        policy_net = DQN_RBP(w, h, t, len(_game.actions)).to(device)
        target_net = DQN_RBP(w, h, t, len(_game.actions)).to(device)
        optimizer = Adam(policy_net.parameters(), lr=1e-4)

        summary_writer = SummaryWriter(
            'runs/%s-%s-%s' %
            (_game.name, strategy_name, datetime.now().isoformat()))
        model = LearningModel(memory=memory,
                              policy_net=policy_net,
                              target_net=target_net,
                              input_dtype=torch.float,
                              optimizer=optimizer,
                              strategy_name=strategy_name,
                              game_name=_game.name,
                              device=device,
                              status=TrainingStatus(summary_writer))
    print('%s: Model prepared' % strategy_name)

    # %%
    train(model,
          create_game,
          hyperparams,
          steps_to_train // hyperparams.game_steps_per_epoch,
          save_every=0)
    save_checkpoint(model)

    with create_game() as game:
        print('Running validation of', strategy_name)
        print_validation(model, game, 5)
    print('%s completed' % strategy_name)
    target_net = DuelingDQN(w, h, t, len(_game.actions)).to(device)

    model = LearningModel(
      memory=memory,
      policy_net=policy_net,
      target_net=target_net,
      optimizer=Adam(policy_net.parameters(), lr=1e-4),
      strategy_name='mpdddq_d',
      game_name=_game.name,
      device=device
    )
  print('Model prepared')

  # %%
  if load_checkpoint(model):
    print('Resuming')
  else:
    print('Starting fresh')

  # %%
  train(model, create_game, hyperparams, 500 // episode_factor, save_every=25 // episode_factor)
  save_checkpoint(model)

  with create_game() as game:
    print('Running validation')
    print_validation(model, game, 5)
  #   print('Playing example')
  #   play_example(model.exec(), game, 'final')

  print('Done.')