def main():
    env = GameEnv(show=False)
    agent = DQNAgent(env=env)
    no_episodes = 500

    for episode in prog_bar(range(no_episodes), ascii=False, unit="episodes"):
        state = env.reset().reshape(1, 2)

        while True:
            action = agent.predict_action(state)
            new_state, reward, done = env.action(action)

            new_state = new_state.reshape(1, 2)

            agent.remember(state, action, reward, new_state, done)
            agent.model_train()
            agent.target_train()

            state = new_state.reshape(1, 2)

            if done:
                if reward == 500:
                    print(f"Completed in episode {episode}")
                    agent.save_model(f"final-{episode}.model")

                break
Example #2
0
def run(arguments) -> None:
    #Create the env
    env = GameEnv()
    agent1 = DeepQNetwork.restore(arguments["TRAINED_MODEL_1"])
    agent2 = DeepQNetwork.restore(arguments["TRAINED_MODEL_2"])

    # Test the agent that was trained
    for e_test in range(TEST_Episodes):
        state = env.reset()
        state = np.reshape(state, [1, agent1.nS])
        tot_reward1 = 0
        tot_reward2 = 0

        if WRITE_VIDEO and e_test == 0:
            fig = plt.figure()
            frames = []

        for t_test in range(1000):
            if SHOW_GAME:
                show_game(env.render_env(), t_test, tot_rewards1, tot_rewards2)
            if WRITE_VIDEO and e_test == 0:
                temp = env.render_env()
                frames.append([
                    plt.text(0, -1, "Time: " + str(t_test), fontsize=8),
                    plt.text(7,
                             -1,
                             "Total reward - player 1: " + str(tot_reward1) +
                             ",  player 2: " + str(tot_reward2),
                             fontsize=8),
                    plt.imshow(temp, animated=True)
                ])

            agent1_action = agent1.test_action(state)
            agent2_action = agent2.test_action(state)
            reward1, reward2 = env.move(agent1_action, agent2_action)
            nstate = tf.reshape(env.contribute_metrix(), [-1])
            nstate = np.reshape(nstate, [1, agent1.nS])
            tot_reward1 += reward1
            tot_reward2 += reward2

            #DON'T STORE ANYTHING DURING TESTING
            state = nstate
            if t_test == 999:
                print("episode: {}/{}, scores: {}, {}".format(
                    e_test, TEST_Episodes, tot_reward1, tot_reward2))
                break

        if WRITE_VIDEO and e_test == 0:
            Writer = matplotlib.animation.writers['ffmpeg']
            writer = Writer(fps=15, metadata=dict(artist='Me'), bitrate=1800)
            ani = matplotlib.animation.ArtistAnimation(fig,
                                                       frames,
                                                       interval=20,
                                                       blit=True)
            ani.save('movies/' + arguments["TRAINED_MODEL_1"].split('/')[-1] +
                     '_test_2players.mp4',
                     writer=writer)
            print(f'Video saved.')
Example #3
0
from stable_baselines import DQN
import stable_baselines
from env import GameEnv
from stable_baselines.a2c.utils import conv, linear, conv_to_fc
from stable_baselines.deepq.policies import FeedForwardPolicy
import tensorflow as tf
import numpy as np


def modified_cnn(scaled_images, **kwargs):
    activ = tf.nn.relu
    layer_1 = activ(conv(scaled_images, 'c1', n_filters=64, filter_size=2, stride=1,  **kwargs))
    layer_2 = activ(conv(layer_1, 'c2', n_filters=128, filter_size=2, stride=1,  **kwargs))
    layer_3 = activ(conv(layer_2, 'c3', n_filters=256, filter_size=2, stride=1,  **kwargs))
    layer_3 = conv_to_fc(layer_3)
    return activ(linear(layer_3, 'fc1', n_hidden=512, init_scale=np.sqrt(2)))


class CustomPolicy(FeedForwardPolicy):

    def __init__(self, *args, **kwargs):
        super(CustomPolicy, self).__init__(*args, **kwargs, cnn_extractor=modified_cnn, feature_extraction="cnn")
# env = stable_baselines.common.vec_env.DummyVecEnv(lambda: GameEnv())


env = GameEnv()
model = DQN(CustomPolicy, env,verbose=1).learn(total_timesteps=int(6e7))
model.save('model')
Example #4
0
#!/usr/bin/env python3
# encoding=utf-8

import matplotlib.pyplot as plt
import matplotlib.animation
import numpy as np

from env import GameEnv

env = GameEnv()
env.reset()

temp = env.render_env()
i = 0
fig = plt.figure()
frames = []
while i<100:
    temp = env.render_env()
    frames.append([plt.text(0, -1, "Time: " + str(i), fontsize=8), plt.imshow(temp,animated=True)])
    # plt.imshow(temp)
    # plt.text(3, -1, f'reward 1: {r1}, reward 2: {r2}', fontsize=8)
    # plt.show(block=False)
    # plt.pause(0.01)
    action1 = np.random.randint(8)
    action2 = np.random.randint(8)

    r1, r2 = env.move(action1, action2)
    i += 1
    if r1 or r2:
        print(i, 'r1: ', r1, 'r2', r2)
Example #5
0
import torch.optim as optim
import matplotlib
import matplotlib.pyplot as plt
import torch.nn.functional as F

from env import GameEnv
from rewards import Reward1
from trainers.dqn import DQNTrainer
from networks import DQN

is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display
plt.ion()

env = GameEnv(reward_class=Reward1)

# define transformation function


# 4*4 matrix
def get_state(self: DQNTrainer):
    with np.errstate(divide='ignore'):
        board = np.where(self.env.board != 0, np.log2(self.env.board), 0)
        return torch.from_numpy(
            np.ascontiguousarray(board)).unsqueeze(0).float().to(self.device)


# 4*16 matrix

Example #6
0
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.cem import CEMAgent
from rl.memory import EpisodeParameterMemory

from env import GameEnv

ENV_NAME = 'CartPole-v0'

# Get the environment and extract the number of actions.
env = GameEnv(shape=(5, 5))

nb_actions = env.action_space.n

# Option 2: deep network
model = Sequential()
model.add(Flatten(input_shape=(1, ) + env.observation_space.shape))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('softmax'))
from env import GameEnv
import numpy as np

env = GameEnv(type="q_table")
done = False

no_episodes = 500000  # number of rounds to train the AI
learning_rate = 0.2  # between 0 and 1, higher value = higher learning rate (in this case freezes if set very high)
discount = 0.95  # more inclined to prefer future rewards than immediate rewards
epsilon = 0.9  # how much we want to explore the environment
epsilon_decay = 0.9998  # decrease exploration each step (multiplied by epilson)
show_every = 100  # at which frequency do we want to see the progress visually

state = env.reset()

q_table = {}
for a in range(10):
    for b in range(10):
        q_table[(a, b)] = [np.random.uniform(-5, 0)
                           for i in range(4)]  # number of actions

for episode in range(no_episodes):
    while True:
        if np.random.random() > epsilon:
            # will be false in the beginning but will diminish towards the end
            action = np.argmax(q_table[state])
        else:
            action = np.random.randint(0, 4)

        new_state, reward, done = env.action(action)