parser.add_argument('--batch_size', type=int, default=4, metavar='N') parser.add_argument('--num_steps', type=int, default=1000001, metavar='N') parser.add_argument('--hidden_size', type=int, default=512, metavar='N') parser.add_argument('--updates_per_step', type=int, default=1, metavar='N') parser.add_argument('--start_steps', type=int, default=10000, metavar='N') parser.add_argument('--target_update_interval', type=int, default=1, metavar='N') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N') args = parser.parse_args() args.cuda =True if torch.cuda.is_available() else False env = h_env.HockeyEnv(mode=h_env.HockeyEnv.NORMAL) # Agent agent = SAC(env.observation_space.shape[0], env.action_space, args) agent.load_model('full_player_models/sac_actor_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000','full_player_models/sac_critic_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000') # opponent = copy.deepcopy(agent) basic_strong = h_env.BasicOpponent(weak=False) time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') #Tesnorboard writer = SummaryWriter(f"strongplay-runs/ERE{time_}_batch_size-{args.batch_size}_gamma-{args.gamma}_tau-{args.tau}_lr-{args.lr}_alpha-{args.alpha}_tuning-{args.automatic_entropy_tuning}_hidden_size-{args.hidden_size}_updatesStep-{args.updates_per_step}_startSteps-{args.start_steps}_targetIntervall-{args.target_update_interval}_replaysize-{args.replay_size}") # Memory memory = ERE_PrioritizedReplay(args.replay_size) # memory = ReplayMemory(args.replay_size,args.seed) # Training Loop total_numsteps = 0 updates = 0
args = parser.parse_args() args.cuda = True if torch.cuda.is_available() else False # Environment # env = NormalizedActions(gym.make(args.env_name)) env = gym.make(args.env_name) env.seed(args.seed) env.action_space.seed(args.seed) torch.manual_seed(args.seed) np.random.seed(args.seed) # Agent agent = SAC(env.observation_space.shape[0], env.action_space, args) #Tesnorboard writer = SummaryWriter('runs/{}_SAC_{}_{}_{}'.format( datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S"), args.env_name, args.policy, "autotune" if args.automatic_entropy_tuning else "")) # Memory memory = PrioritizedReplay(args.replay_size) # Training Loop total_numsteps = 0 updates = 0 for i_episode in itertools.count(1): episode_reward = 0
parser.add_argument('--batch_size', type=int, default=8, metavar='N') parser.add_argument('--num_steps', type=int, default=1000001, metavar='N') parser.add_argument('--hidden_size', type=int, default=512, metavar='N') parser.add_argument('--updates_per_step', type=int, default=1, metavar='N') parser.add_argument('--start_steps', type=int, default=10000, metavar='N') parser.add_argument('--target_update_interval', type=int, default=1, metavar='N') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N') args = parser.parse_args() args.cuda =True if torch.cuda.is_available() else False env = h_env.HockeyEnv(mode=h_env.HockeyEnv.NORMAL) # Agent agent = SAC(env.observation_space.shape[0], env.action_space, args) actor = "full_player_models/sac_actor_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000" critic = "full_player_models/sac_critic_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000" agent.load_model(actor,critic) # opponent = copy.deepcopy(agent) opponent = h_env.BasicOpponent(weak=False) time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S') #Tesnorboard # Memory memory = PrioritizedReplay(args.replay_size) # memory = ReplayMemory(args.replay_size,args.seed) # Training Loop total_numsteps = 0
type=int, default=1, metavar='N') parser.add_argument('--replay_size', type=int, default=1000000, metavar='N') args = parser.parse_args() args.cuda = True if torch.cuda.is_available() else False env = h_env.HockeyEnv(mode=h_env.HockeyEnv.NORMAL) # Agent root = 'finals/' runs = sorted(os.listdir(root)) runs = [r + '/' for r in runs] agent = SAC(env.observation_space.shape[0], env.action_space, args) opponent = SAC(env.observation_space.shape[0], env.action_space, args) player1 = 1 player2 = 3 basic1 = False basic2 = False # print(f"{runs[player1]} vs {runs[player2]}") print(f"{runs[player1]} vs {runs[player2]}") models1 = sorted(os.listdir(root + runs[player1])) actor = root + runs[player1] + models1[0] critic = root + runs[player1] + models1[1] target = root + runs[player1] + models1[2] if len(models1) == 3 else None models2 = sorted(os.listdir(root + runs[player2])) o_actor = root + runs[player2] + models2[0]