Beispiel #1
0
parser.add_argument('--num_steps', type=int, default=1000001, metavar='N')
parser.add_argument('--hidden_size', type=int, default=512, metavar='N')
parser.add_argument('--updates_per_step', type=int, default=1, metavar='N')
parser.add_argument('--start_steps', type=int, default=10000, metavar='N')
parser.add_argument('--target_update_interval', type=int, default=1, metavar='N')
parser.add_argument('--replay_size', type=int, default=1000000, metavar='N')

args = parser.parse_args()

args.cuda =True if torch.cuda.is_available() else False


env = h_env.HockeyEnv(mode=h_env.HockeyEnv.NORMAL)
# Agent
agent = SAC(env.observation_space.shape[0], env.action_space, args)
agent.load_model('full_player_models/sac_actor_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000','full_player_models/sac_critic_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000')
# opponent = copy.deepcopy(agent)
basic_strong = h_env.BasicOpponent(weak=False)
time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
#Tesnorboard
writer = SummaryWriter(f"strongplay-runs/ERE{time_}_batch_size-{args.batch_size}_gamma-{args.gamma}_tau-{args.tau}_lr-{args.lr}_alpha-{args.alpha}_tuning-{args.automatic_entropy_tuning}_hidden_size-{args.hidden_size}_updatesStep-{args.updates_per_step}_startSteps-{args.start_steps}_targetIntervall-{args.target_update_interval}_replaysize-{args.replay_size}")

# Memory
memory = ERE_PrioritizedReplay(args.replay_size)
# memory = ReplayMemory(args.replay_size,args.seed)

# Training Loop
total_numsteps = 0
updates = 0

Beispiel #2
0
player2 = 3
basic1 = False
basic2 = False
# print(f"{runs[player1]} vs {runs[player2]}")
print(f"{runs[player1]} vs {runs[player2]}")
models1 = sorted(os.listdir(root + runs[player1]))
actor = root + runs[player1] + models1[0]
critic = root + runs[player1] + models1[1]
target = root + runs[player1] + models1[2] if len(models1) == 3 else None

models2 = sorted(os.listdir(root + runs[player2]))
o_actor = root + runs[player2] + models2[0]
o_critic = root + runs[player2] + models2[1]
o_target = root + runs[player2] + models2[2] if len(models2) == 3 else None

agent.load_model(actor, critic, target)
opponent.load_model(o_actor, o_critic, o_target)

basic = h_env.BasicOpponent(weak=False)

time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
#Tesnorboard
# writer = SummaryWriter(f"hockey-runs-defence/{time_}_batch_size-{args.batch_size}_gamma-{args.gamma}_tau-{args.tau}_lr-{args.lr}_alpha-{args.alpha}_tuning-{args.automatic_entropy_tuning}_hidden_size-{args.hidden_size}_updatesStep-{args.updates_per_step}_startSteps-{args.start_steps}_targetIntervall-{args.target_update_interval}_replaysize-{args.replay_size}")

# Memory
# memory = PrioritizedReplay(args.replay_size)
memory = ReplayMemory(args.replay_size, args.seed)

# Training Loop
total_numsteps = 0
updates = 0
Beispiel #3
0
parser.add_argument('--start_steps', type=int, default=10000, metavar='N')
parser.add_argument('--target_update_interval', type=int, default=1, metavar='N')
parser.add_argument('--replay_size', type=int, default=1000000, metavar='N')

args = parser.parse_args()

args.cuda =True if torch.cuda.is_available() else False


env = h_env.HockeyEnv(mode=h_env.HockeyEnv.NORMAL)
# Agent
agent = SAC(env.observation_space.shape[0], env.action_space, args)
actor = "full_player_models/sac_actor_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000"
critic = "full_player_models/sac_critic_hockey_11200_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-256_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000"

agent.load_model(actor,critic)
# opponent = copy.deepcopy(agent)
opponent = h_env.BasicOpponent(weak=False)
time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
#Tesnorboard

# Memory
memory = PrioritizedReplay(args.replay_size)
# memory = ReplayMemory(args.replay_size,args.seed)

# Training Loop
total_numsteps = 0
updates = 0

o = env.reset()
# _ = env.render()
Beispiel #4
0
parser.add_argument('--replay_size', type=int, default=1000000, metavar='N')

args = parser.parse_args()

args.cuda = True if torch.cuda.is_available() else False

env = h_env.HockeyEnv(mode=h_env.HockeyEnv.TRAIN_DEFENSE)
# Agent
agent = SAC(env.observation_space.shape[0], env.action_space, args)
# actor512 = 'hockey-hidden-models-attack/sac_actor_hockey_reward-8.385833864540086_episode-41000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-512_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-40-41'
# critic512 = 'hockey-hidden-models-attack/sac_critic_hockey_reward-8.385833864540086_episode-41000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-512_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-40-41'
# actor128 = 'hockey-hidden-models-attack/sac_actor_hockey_reward-8.184820100545167_episode-39000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-128_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-36-16'
# critic128 = 'hockey-hidden-models-attack/sac_critic_hockey_reward-8.184820100545167_episode-39000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-128_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-36-16'
actor64 = 'hockey-hidden-models-attack/sac_actor_hockey_reward-8.407677291229737_episode-33000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-64_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-36-10'
critic64 = 'hockey-hidden-models-attack/sac_critic_hockey_reward-8.407677291229737_episode-33000_batch_size-4_gamma-0.95_tau-0.005_lr-0.0003_alpha-0.2_tuning-True_hidden_size-64_updatesStep-1_startSteps-10000_targetIntervall-1_replaysize-1000000_t-2021-03-10_22-36-10'
agent.load_model(actor64, critic64)
time_ = datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
#Tesnorboard
writer = SummaryWriter(
    f"hockey-hidden-runs-defence/{time_}_batch_size-{args.batch_size}_gamma-{args.gamma}_tau-{args.tau}_lr-{args.lr}_alpha-{args.alpha}_tuning-{args.automatic_entropy_tuning}_hidden_size-{args.hidden_size}_updatesStep-{args.updates_per_step}_startSteps-{args.start_steps}_targetIntervall-{args.target_update_interval}_replaysize-{args.replay_size}"
)

# Memory
memory = PrioritizedReplay(args.replay_size)
# memory = ReplayMemory(args.replay_size,args.seed)

# Training Loop
total_numsteps = 0
updates = 0

o = env.reset()