예제 #1
0
checkpoint = torch.load("checkpoint_max.tar")

agent = DeepSnakeNetwork(16, 3)
agent.load_state_dict(checkpoint["model_state_dict"])

agent.eval()

optimizer = optim.SGD(agent.parameters(), lr=1e-2)
criterion = F.smooth_l1_loss
epsilon = agent.initial_epsilon
decay = 0.999996

replay_memory = []
game_state = GameState()
state = torch.tensor(game_state.initial_state()[0], dtype=torch.float32)

losses = []
loss_counter = 0
epoch_loss = 0

max_score, old_score = 0, 0

for epoch in range(agent.number_of_iterations):
    check_exit()
    clock.tick(10)
    output = agent(state)

    action = select_action(output)

    new_state, new_reward, is_state_terminal, score = game_state.frame_step(