from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy, BoltzmannGumbelQPolicy, SoftmaxPolicy, LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory
from rl.callbacks import TestLogger, ModelIntervalCheckpoint, TrainEpisodeLogger, Callback

from models import LordTateKanti

from envs import halite_env
from envs.command_env import CommandEnv
from envs.metrics_env import MetricsEnv
from envs.tensorboard_callback import TensorBoard

bot_name = 'PlanetCaptureSmartBot'

env = halite_env.Env()
env.configure(socket_path=f"/dev/shm/{time.time_ns()}", replay=False, bot_name=bot_name)
env = MetricsEnv(env)
env = CommandEnv(env)
nb_actions = env.action_space.n

model = LordTateKanti.make_model(env)
model.summary()

# parameters


nb_steps = 20_000
nb_steps_warmup = int(nb_steps * 0.01)
memory = SequentialMemory(limit=10_000, window_length=1)
policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1.0, value_min=0.01, value_test=0.05, nb_steps=int(nb_steps * 0.66))
parser = argparse.ArgumentParser()
parser.add_argument('--use-stdio',
                    action='store_true',
                    help='Use stdio for game')
args = parser.parse_args()


def ifprint(*args_, **kwargs):
    if not args.use_stdio:
        print(*args_, **kwargs)


bot_name = 'AttractionBotOBL'

env = halite_env.Env(stdio=args.use_stdio)
env.configure(socket_path=f"/dev/shm/{time.time_ns()}", bot_name=bot_name)
env = AttractionEnv(env)

current_directory = os.path.dirname(os.path.abspath(__file__))
model_location = os.path.join(current_directory, f'ppo_{bot_name}_model')
model = PPO.load(model_location)

total_reward = 0
reward = 0
observations = env.reset()
while True:
    actions = model.predict(observations)[0]

    ifprint(f'{actions=}')
Beispiel #3
0
from envs import halite_env
from envs.attraction_env import AttractionEnv
from envs.metrics_env import MetricsEnv

from models import Illogical

bot_name = 'AttractionBotOBL'
weights_name = f'sac_{bot_name}_model'

parser = argparse.ArgumentParser()
parser.add_argument('--load', action='store_true', help='Continue')
args = parser.parse_args()

env = halite_env.Env(
    enemy_bot=
    '/home/vova/Documents/multiagent_systems/L3/bots/venv3.8/bin/python /home/vova/Documents/multiagent_systems/L3/bots/Covid-chance/MyBot-v0.1.1-alpha.py'
)
env.configure(socket_path=f"/dev/shm/{time.time_ns()}", bot_name=bot_name)
env = AttractionEnv(env)
env = Monitor(env)
n_actions = env.action_space.shape[-1]
param_noise = None
action_noise = NormalActionNoise(mean=np.zeros(n_actions),
                                 sigma=0.2 * np.ones(n_actions))

policy_kwargs = dict(activation_fn=th.nn.Sigmoid, net_arch=[500, 500, 200])
model = SAC('MlpPolicy',
            env,
            policy_kwargs=policy_kwargs,
            verbose=1,
            tensorboard_log=f'./runs/{time.time()}/')