Python LearningModel Examples

Programming Language: Python

Namespace/Package Name: drl.deepq.learn

Class/Type: LearningModel

Examples at hotexamples.com: 5

Python LearningModel - 5 examples found. These are the top rated real world Python examples of drl.deepq.learn.LearningModel extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LearningModel(5)

Frequently Used Methods

LearningModel (5)

Example #1

Show file

        strategy_name = 'pdddq_rbp_half'
        memory = PrioritizedReplayMemory(memory_size)
        policy_net = DuelingDQN_RBP(w, h, t,
                                    len(_game.actions)).to(device).half()
        target_net = DuelingDQN_RBP(w, h, t,
                                    len(_game.actions)).to(device).half()
        optimizer = Adam(policy_net.parameters(), lr=1e-5)

        summary_writer = SummaryWriter(
            'runs/%s-%s-%s' %
            (_game.name, strategy_name, datetime.now().isoformat()))
        model = LearningModel(memory=memory,
                              policy_net=policy_net,
                              target_net=target_net,
                              input_dtype=torch.half,
                              optimizer=optimizer,
                              strategy_name=strategy_name,
                              game_name=_game.name,
                              device=device,
                              status=TrainingStatus(summary_writer))
    print('Model prepared')

    # %%
    if load_checkpoint(model):
        print('Resuming')
    else:
        print('Starting fresh')

    # %%
    train(model,
          create_game,

Example #2

Show file

File: pong-30.py Project: msiegenthaler/deep-reinforced-learning-course

def create_game():
  return Pong30Min(w, h)


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Using device %s' % device)

with create_game() as _game:
  memory = SimpleReplayMemory(memory_size)
  policy_net = DQN(w, h, t, len(_game.actions)).to(device)

  model = LearningModel(
    memory=memory,
    policy_net=policy_net,
    target_net=DQN(w, h, t, len(_game.actions)).to(device),
    optimizer=Adam(policy_net.parameters(), lr=1e-5),
    game_name=_game.name,
    strategy_name='30min',
    device=device
  )
print('Model prepared')

# %%
if load_checkpoint(model):
  print('Resuming')
else:
  print('Starting fresh')

# %%
train(model, create_game, hyperparams, 500000 // hyperparams.game_steps_per_epoch,
      validation_episodes=0, save_every=25, example_every=0)

Example #3

Show file

File: pong.py Project: msiegenthaler/deep-reinforced-learning-course

  print('Using device %s' % device)

  def create_game():
    return Pong(w, h, t)

  with create_game() as _game:
    # memory = PrioritizedReplayMemory(memory_size)
    memory = SimpleReplayMemory(memory_size)
    policy_net = DuelingDQN(w, h, t, len(_game.actions)).to(device)
    target_net = DuelingDQN(w, h, t, len(_game.actions)).to(device)

    model = LearningModel(
      memory=memory,
      policy_net=policy_net,
      target_net=target_net,
      optimizer=Adam(policy_net.parameters(), lr=1e-4),
      strategy_name='mpdddq_d',
      game_name=_game.name,
      device=device
    )
  print('Model prepared')

  # %%
  if load_checkpoint(model):
    print('Resuming')
  else:
    print('Starting fresh')

  # %%
  train(model, create_game, hyperparams, 500 // episode_factor, save_every=25 // episode_factor)
  save_checkpoint(model)

Example #4

Show file

def train_with(device: torch.device, steps_to_train: int,
               game_steps_per_step: int, prio_memory: bool):
    episode_factor = 5
    w = h = 84
    t = 4
    memory_size = 50000
    batch_per_game_step = 32
    batch_size = game_steps_per_step * batch_per_game_step
    hyperparams = TrainingHyperparameters(
        gamma=0.99,
        beta=linear_increase(0.01 * episode_factor),
        exploration_rate=linear_decay(0.008 * episode_factor,
                                      max_value=1.,
                                      min_value=0.01),
        batch_size=batch_size,
        game_steps_per_step=game_steps_per_step,
        copy_to_target_every=1000,
        game_steps_per_epoch=1000 * episode_factor,
        multi_step_n=4,
        warmup_rounds=500,
        init_memory_steps=1000,
        parallel_game_processes=2,
        max_batches_prefetch=10,
        states_on_device=True)

    with create_game() as _game:
        strategy_name = 'floaton-steps%d-%s' % (game_steps_per_step, 'prm'
                                                if prio_memory else 'srm')
        if prio_memory:
            memory = PrioritizedReplayMemory(memory_size)
        else:
            memory = SimpleReplayMemory(memory_size)
        policy_net = DQN_RBP(w, h, t, len(_game.actions)).to(device)
        target_net = DQN_RBP(w, h, t, len(_game.actions)).to(device)
        optimizer = Adam(policy_net.parameters(), lr=1e-4)

        summary_writer = SummaryWriter(
            'runs/%s-%s-%s' %
            (_game.name, strategy_name, datetime.now().isoformat()))
        model = LearningModel(memory=memory,
                              policy_net=policy_net,
                              target_net=target_net,
                              input_dtype=torch.float,
                              optimizer=optimizer,
                              strategy_name=strategy_name,
                              game_name=_game.name,
                              device=device,
                              status=TrainingStatus(summary_writer))
    print('%s: Model prepared' % strategy_name)

    # %%
    train(model,
          create_game,
          hyperparams,
          steps_to_train // hyperparams.game_steps_per_epoch,
          save_every=0)
    save_checkpoint(model)

    with create_game() as game:
        print('Running validation of', strategy_name)
        print_validation(model, game, 5)
    print('%s completed' % strategy_name)

Example #5

Show file

File: pong-2.py Project: msiegenthaler/deep-reinforced-learning-course

  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  print('Using device %s' % device)

  with create_game() as _game:
    strategy_name = 'pdddq_rbp'
    memory = PrioritizedReplayMemory(memory_size)
    policy_net = DuelingDQN_RBP(w, h, t, len(_game.actions)).to(device)
    target_net = DuelingDQN_RBP(w, h, t, len(_game.actions)).to(device)

    summary_writer = SummaryWriter('runs/%s-%s-%s' % (_game.name, strategy_name, datetime.now().isoformat()))
    model = LearningModel(
      memory=memory,
      policy_net=policy_net,
      target_net=target_net,
      optimizer=Adam(policy_net.parameters(), lr=1e-4),
      strategy_name=strategy_name,
      game_name=_game.name,
      device=device,
      status=TrainingStatus(summary_writer)
    )
  print('Model prepared')

  # %%
  if load_checkpoint(model):
    print('Resuming')
  else:
    print('Starting fresh')

  # %%
  train(model, create_game, hyperparams, steps_to_train // hyperparams.game_steps_per_epoch,
        save_every=25 // episode_factor)