total_steps +=1
    print("  learning...")
    print(touch_count, " touchs in episode ", i_episode)
    finish_episode_learning(model, optimizer)

    if log_name:
      writer.add_scalar(log_name+"/reward",running_reward,total_steps)
      writer.add_scalar(log_name+"/touches",len(observed_touches),total_steps)
      writer.add_scalar(log_name+"/average_activated_pixels",np.mean(average_activated_pixels),total_steps)
    if i_episode % args.log_interval == 0:
      print('  Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(i_episode, step, running_reward))
    if running_reward > 5000: #env.spec.reward_threshold:
      print("  Solved! Running reward is now {} and the last episode runs to {} time steps!".format(running_reward, t))
      break
    if model_path:
      env.mkdir_p(model_path)
      torch.save(model.state_dict(), os.path.join(model_path, 'policy.pkl' ))
      torch.save(model.state_dict(), os.path.join(model_path, 'cnn_lstm.pkl' ))
  print("touched ", touched_episodes, " times")

elif args.mode == "test" or args.mode == "all":
  #test
  test_labels = []
  predicted_labels = []
  steps_to_guess = []
  correct = 0
  total_correct = 0
  total = 0
  touched_episodes = 0

  for i_episode in range(100):
Exemple #2
0
            writer.add_scalar(args.log + "/reward", running_reward,
                              total_steps)
            writer.add_scalar(args.log + "/touches", len(observed_touches),
                              total_steps)
            writer.add_scalar(args.log + "/average_activated_pixels",
                              np.mean(average_activated_pixels), total_steps)
        if i_episode % args.log_interval == 0:
            print('  Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.
                  format(i_episode, step, running_reward))
        if running_reward > 5000:  #env.spec.reward_threshold:
            print(
                "  Solved! Running reward is now {} and the last episode runs to {} time steps!"
                .format(running_reward, t))
            break
        if args.model_path:
            env.mkdir_p(args.model_path)
            torch.save(model.state_dict(),
                       os.path.join(args.model_path, 'policy.pkl'))
            torch.save(model.state_dict(),
                       os.path.join(args.model_path, 'cnn_lstm.pkl'))

elif args.mode == "test" or args.mode == "all":
    #test
    test_labels = []
    predicted_labels = []
    steps_to_guess = []
    correct = 0
    total_correct = 0
    total = 0

    for i_episode in range(100):