from rl.agents.dqn import DQNAgent from rl.policy import BoltzmannQPolicy, BoltzmannGumbelQPolicy, SoftmaxPolicy, LinearAnnealedPolicy, EpsGreedyQPolicy from rl.memory import SequentialMemory from rl.callbacks import TestLogger, ModelIntervalCheckpoint, TrainEpisodeLogger, Callback from models import LordTateKanti from envs import halite_env from envs.command_env import CommandEnv from envs.metrics_env import MetricsEnv from envs.tensorboard_callback import TensorBoard bot_name = 'PlanetCaptureSmartBot' env = halite_env.Env() env.configure(socket_path=f"/dev/shm/{time.time_ns()}", replay=False, bot_name=bot_name) env = MetricsEnv(env) env = CommandEnv(env) nb_actions = env.action_space.n model = LordTateKanti.make_model(env) model.summary() # parameters nb_steps = 20_000 nb_steps_warmup = int(nb_steps * 0.01) memory = SequentialMemory(limit=10_000, window_length=1) policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.01, value_test=0.05, nb_steps=int(nb_steps * 0.66))
parser = argparse.ArgumentParser() parser.add_argument('--use-stdio', action='store_true', help='Use stdio for game') args = parser.parse_args() def ifprint(*args_, **kwargs): if not args.use_stdio: print(*args_, **kwargs) bot_name = 'AttractionBotOBL' env = halite_env.Env(stdio=args.use_stdio) env.configure(socket_path=f"/dev/shm/{time.time_ns()}", bot_name=bot_name) env = AttractionEnv(env) current_directory = os.path.dirname(os.path.abspath(__file__)) model_location = os.path.join(current_directory, f'ppo_{bot_name}_model') model = PPO.load(model_location) total_reward = 0 reward = 0 observations = env.reset() while True: actions = model.predict(observations)[0] ifprint(f'{actions=}')
from envs import halite_env from envs.attraction_env import AttractionEnv from envs.metrics_env import MetricsEnv from models import Illogical bot_name = 'AttractionBotOBL' weights_name = f'sac_{bot_name}_model' parser = argparse.ArgumentParser() parser.add_argument('--load', action='store_true', help='Continue') args = parser.parse_args() env = halite_env.Env( enemy_bot= '/home/vova/Documents/multiagent_systems/L3/bots/venv3.8/bin/python /home/vova/Documents/multiagent_systems/L3/bots/Covid-chance/MyBot-v0.1.1-alpha.py' ) env.configure(socket_path=f"/dev/shm/{time.time_ns()}", bot_name=bot_name) env = AttractionEnv(env) env = Monitor(env) n_actions = env.action_space.shape[-1] param_noise = None action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.2 * np.ones(n_actions)) policy_kwargs = dict(activation_fn=th.nn.Sigmoid, net_arch=[500, 500, 200]) model = SAC('MlpPolicy', env, policy_kwargs=policy_kwargs, verbose=1, tensorboard_log=f'./runs/{time.time()}/')