total_steps +=1 print(" learning...") print(touch_count, " touchs in episode ", i_episode) finish_episode_learning(model, optimizer) if log_name: writer.add_scalar(log_name+"/reward",running_reward,total_steps) writer.add_scalar(log_name+"/touches",len(observed_touches),total_steps) writer.add_scalar(log_name+"/average_activated_pixels",np.mean(average_activated_pixels),total_steps) if i_episode % args.log_interval == 0: print(' Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'.format(i_episode, step, running_reward)) if running_reward > 5000: #env.spec.reward_threshold: print(" Solved! Running reward is now {} and the last episode runs to {} time steps!".format(running_reward, t)) break if model_path: env.mkdir_p(model_path) torch.save(model.state_dict(), os.path.join(model_path, 'policy.pkl' )) torch.save(model.state_dict(), os.path.join(model_path, 'cnn_lstm.pkl' )) print("touched ", touched_episodes, " times") elif args.mode == "test" or args.mode == "all": #test test_labels = [] predicted_labels = [] steps_to_guess = [] correct = 0 total_correct = 0 total = 0 touched_episodes = 0 for i_episode in range(100):
writer.add_scalar(args.log + "/reward", running_reward, total_steps) writer.add_scalar(args.log + "/touches", len(observed_touches), total_steps) writer.add_scalar(args.log + "/average_activated_pixels", np.mean(average_activated_pixels), total_steps) if i_episode % args.log_interval == 0: print(' Episode {}\tLast length: {:5d}\tAverage length: {:.2f}'. format(i_episode, step, running_reward)) if running_reward > 5000: #env.spec.reward_threshold: print( " Solved! Running reward is now {} and the last episode runs to {} time steps!" .format(running_reward, t)) break if args.model_path: env.mkdir_p(args.model_path) torch.save(model.state_dict(), os.path.join(args.model_path, 'policy.pkl')) torch.save(model.state_dict(), os.path.join(args.model_path, 'cnn_lstm.pkl')) elif args.mode == "test" or args.mode == "all": #test test_labels = [] predicted_labels = [] steps_to_guess = [] correct = 0 total_correct = 0 total = 0 for i_episode in range(100):