def setup_env(env_id: str, monitor_dir: str = None) -> gym.Env:
    """
    Make and environment and set it up with wrappers.

    Args:
        env_id: the id for the environment to load
        output_dir: the output directory to route monitor output to

    Returns:
        a loaded and wrapped Open AI Gym environment

    """
    if 'Tetris' in env_id:
        import gym_tetris
        env = gym_tetris.make(env_id)
        env = gym_tetris.wrap(env, clip_rewards=False)
    elif 'SuperMarioBros' in env_id:
        import gym_super_mario_bros
        env = gym_super_mario_bros.make(env_id)
        env = BinarySpaceToDiscreteSpaceEnv(env, SIMPLE_MOVEMENT)
        env = nes_py_wrap(env)
    else:
        env = build_atari_environment(env_id)

    if monitor_dir is not None:
        env = gym.wrappers.Monitor(env, monitor_dir, force=True)

    return env
예제 #2
0
    def __init__(self, env, seed, max_episode_length, action_repeat,
                 bit_depth, args):
        from nes_py.wrappers import JoypadSpace
        import gym_tetris
        from gym_tetris.actions import SIMPLE_MOVEMENT

        self._env = gym_tetris.make(env,skip_level=True)
        self._env.seed(seed)
        self._env = JoypadSpace(self._env, SIMPLE_MOVEMENT)
        self.max_episode_length = max_episode_length
        self.action_repeat = action_repeat
        self.bit_depth = bit_depth
        self.small_image = args.small_image
        self.add_reward = args.add_reward
        self.typeb = "1" in env
        self.acc = 0.03 if self.typeb else 3
        self.living = 0.003 if self.typeb else 0.3
        self.dim=1 if args.binary_image else 3
        if args.binary_image:
            self._process_obs=_images_to_observation_binary
        else:
            self._process_obs=_images_to_observation
        self.one_skip=False
        if not args.add_reward:
            self.acc=0
            self.living=0
예제 #3
0
    def play(self):
        env = gym_tetris.make('TetrisA-v0')
        env = JoypadSpace(env, MOVEMENT)
        state = env.reset()
        model = self.global_model
        model_path = os.path.join(self.save_dir,
                                  'model_{}.h5'.format('Tetris'))
        print('Loading model from: {}'.format(model_path))
        model.load_weights(model_path)
        done = False
        step_counter = 0
        reward_sum = 0
        pieza_colocada = True
        informacion = env.get_info()
        antiguo_statistics = informacion['statistics']
        state = [0, 0, 0, 0]
        while not done:
            env.render()
            if pieza_colocada:
                pieza_colocada = False
                pos = 5
                giro = 0
                u = -1
                state = [state]
                policy, value = model(
                    tf.convert_to_tensor(state, dtype=tf.float32))
                policy = tf.nn.softmax(policy)
                action = np.argmax(policy)
                pos_objetivo = action % 10
                giro_objetivo = action // 10
            if (giro % giro_objetivo) != 0 and not done:
                state, reward, done, info = env.step(1)
                accion = 0
                giro = giro + 1
            elif pos > pos_objetivo and not done:
                state, reward, done, info = env.step(6)
                pos = pos - 1
                accion = 0
            elif pos < pos_objetivo and not done:
                state, reward, done, info = env.step(3)
                pos = pos + 1
                accion = 0
            elif not done and not pieza_colocada:
                state, reward, done, info = env.step(9)
                accion = 9
            else:
                accion = 0
            if not done:
                state, reward, done, info = env.step(accion)
            env.render()
            informacion = env.get_info()
            if antiguo_statistics != informacion['statistics']:
                antiguo_statistics = informacion['statistics']
                step_counter += 1

        env.close()
예제 #4
0
    def __init__(self):

        self.save_dir = '/'

        env = gym_tetris.make('TetrisA-v0')
        self.state_size = 4
        self.action_size = 40
        self.opt = tf.train.AdamOptimizer(0.001, use_locking=True)
        print(self.state_size, self.action_size)

        self.global_model = ActorCriticModel(
            self.state_size, self.action_size)  # global network
        self.global_model(
            tf.convert_to_tensor(np.random.random((1, self.state_size)),
                                 dtype=tf.float32))
예제 #5
0
    def work(self):

        self.env = gym_tetris.make('TetrisA-v0')
        self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT)

        self.env.reset()
        action = np.argmax(self.env.action_space.sample())
        ob, _, _, _ = self.env.step(int(action))

        inx = 10
        iny = 20
        done = False

        # net = neat.nn.FeedForwardNetwork.create(self.genome, self.config)
        net = neat.nn.recurrent.RecurrentNetwork.create(
            self.genome, self.config)
        fitness = 0
        xpos = 0
        xpos_max = 16
        counter = 0
        max_score = 0
        moving = 0
        frames = 0

        while not done:
            scaledimg = cv2.cvtColor(ob, cv2.COLOR_BGR2RGB)
            ob = Minimize(ob)
            ob = cv2.resize(ob, (10, 20))

            cv2.imshow('humanwin', scaledimg)
            cv2.waitKey(1)

            imgarray = np.ndarray.flatten(ob)

            actions = net.activate(imgarray)
            action = np.argmax(actions)
            ob, rew, done, info = self.env.step(int(action))

            frames += 1
            if frames == 1200:
                fitness += 1
                frames = 0

        print(
            f"genome:{self.genome.key} Fitnes: {fitness} lines: {info['number_of_lines']}"
        )

        return int(fitness)
예제 #6
0
def main():

    print("Creating environment...")
    environment = gym_tetris.make('Tetris-v0')

    print("Creating model...")
    model = modelutils.create_model(number_of_actions)
    model.summary()

    print("Creating agent...")
    if agent_type == "dqn":
        agent = DQNAgent(
            name="tetris-dqn",
            environment=environment,
            model=model,
            observation_transformation=utils.resize_and_bgr2gray,
            observation_frames=4,
            number_of_iterations=1000000,
            gamma=0.95,
            final_epsilon=0.01,
            initial_epsilon=1.0,
            replay_memory_size=2000,
            minibatch_size=32
        )
    elif agent_type == "ddqn":
        agent = DDQNAgent(
            name="tetris-ddqn",
            environment=environment,
            model=model,
            observation_transformation=utils.resize_and_bgr2gray,
            observation_frames=4,
            number_of_iterations=1000000,
            gamma=0.95,
            final_epsilon=0.01,
            initial_epsilon=1.0,
            replay_memory_size=2000,
            minibatch_size=32,
            model_copy_interval=100
        )
    agent.enable_rewards_tracking(rewards_running_means_length=10000)
    agent.enable_episodes_tracking(episodes_running_means_length=100)
    agent.enable_maxq_tracking(maxq_running_means_length=10000)
    agent.enable_model_saving(model_save_frequency=10000)
    agent.enable_plots_saving(plots_save_frequency=10000)

    print("Training ...")
    agent.fit(verbose=True, headless="headless" in sys.argv, render_states=True)
예제 #7
0
def main():
    print("Loading model...")
    if len(sys.argv) == 1:
        model_paths = glob.glob(os.path.join("pretrained_models", "*.h5"))
        model_paths = [model_path for model_path in model_paths if "tetris" in model_path]
        assert len(model_paths) != 0, "Did not find any models."
        model_paths = sorted(model_paths)
        model_path = model_paths[-1]
    else:
        model_path = sys.argv[1]
    model = models.load_model(model_path)
    print(model_path, "loaded.")

    print("Creating environment...")
    environment = gym_tetris.make('Tetris-v0')

    print("Running ...")
    run(model, environment, verbose="verbose" in sys.argv)
예제 #8
0
def make_env(cfg):
    """Helper function to create dm_control environment"""
    if cfg.env == 'ball_in_cup_catch':
        domain_name = 'ball_in_cup'
        task_name = 'catch'
    elif cfg.env == 'point_mass_easy':
        domain_name = 'point_mass'
        task_name = 'easy'
    else:
        domain_name = cfg.env.split('_')[0]
        task_name = '_'.join(cfg.env.split('_')[1:])

    # per dreamer: https://github.com/danijar/dreamer/blob/02f0210f5991c7710826ca7881f19c64a012290c/wrappers.py#L26
    camera_id = 2 if domain_name == 'quadruped' else 0

#     env = dmc2gym.make(domain_name=domain_name,
#                        task_name=task_name,
#                        seed=cfg.seed,
#                        visualize_reward=False,
#                        from_pixels=True,
#                        height=cfg.image_size,
#                        width=cfg.image_size,
#                        frame_skip=cfg.action_repeat,
#                        camera_id=camera_id)
    # env = gym.make("CarRacing-v0")
    env_ = gym_tetris.make('TetrisA-v0')
    env = JoypadSpace(env_, SIMPLE_MOVEMENT)
    # env = MaxAndSkipEnv(env)
    # env._max_episode_steps = env_._max_episode_steps
    max_episode_steps = 10000
    env = WrapPyTorch(env, max_episode_steps)
    env.seed(cfg.seed)
    # print(env.ram)
    obs = env.reset()
    print(obs.shape)
    # env.seed(cfg.seed)

    env = utils.FrameStack(env, k=cfg.frame_stack)
    print("Init env done")
    # assert env.action_space.low.min() >= -1
    # assert env.action_space.high.max() <= 1

    return env
예제 #9
0
    def rank(self):
        for model in self.models:
            env = gym_tetris.make('TetrisA-v0')
            env = JoypadSpace(env, SIMPLE_MOVEMENT)
            env.reset()
            done = False
            info = None

            while not done:
                # Generate all options
                options = [[Action.DROP]]
                for x in range(1, 5):
                    left_option = [Action.LEFT] * x
                    left_option.append(Action.DROP)
                    options.append(left_option)
                    right_option = [Action.RIGHT] * x
                    right_option.append(Action.DROP)
                    options.append(right_option)

                # Enumerate all choices
                boards = []
                for option in options:
                    # Back-up the environment first
                    env.unwrapped._backup()

                    # Run the sequence of actions
                    state = None
                    for action in option:
                        state, _, _, _ = env.step(action.value)

                    # Now, parse the board from the state
                    board = parse_blocks(state)
                    boards.append(board)
                    env.unwrapped._restore()

                # Choose the best option genetically
                choice = model.best(boards)
                for action in options[choice]:
                    _, _, done, info = env.step(action.value)

            model.fitness = info['score']

        self.models = sorted(self.models, key=lambda model: model.fitness)
예제 #10
0
 def __init__(self, env, symbolic, seed, max_episode_length, action_repeat,
              bit_depth, args):
     self.symbolic = symbolic
     # self._env = gym.make(env)
     self._env = gym_tetris.make(env, skip_level=True)
     self._env.seed(seed)
     self._env = JoypadSpace(self._env, SIMPLE_MOVEMENT)
     if symbolic:
         self._env = SymbolTetris(self._env)
     self.max_episode_length = max_episode_length
     self.action_repeat = 1
     self.bit_depth = bit_depth
     self.typeb = "1" in env
     self.acc = 0.01 if self.typeb else 3
     self.living = 0.01 if self.typeb else 0.3
     self.die = -50
     self.score = 0.0
     self.add_reward = args.add_reward
     if not args.add_reward:
         self.acc = 0
         self.living = 0
         self.die = 0
예제 #11
0
 def __init__(self,
              state_size,
              action_size,
              global_model,
              opt,
              result_queue,
              idx,
              game_name='Tetris',
              save_dir='/tmp'):
     super(Worker, self).__init__()
     self.state_size = state_size
     self.action_size = action_size
     self.result_queue = result_queue
     self.global_model = global_model
     self.opt = opt
     self.local_model = ActorCriticModel(self.state_size, self.action_size)
     self.worker_idx = idx
     self.env = gym_tetris.make('TetrisA-v0')
     self.env = JoypadSpace(self.env, MOVEMENT)
     self.save_dir = save_dir
     self.ep_loss = 0.0
     self.game_name = 'Tetris'
예제 #12
0
    def __init__(self, env_id, seed, max_episode_length=1000):
        super(GameEnv, self).__init__()
        extra_args = ENV_GAMES_ARGS.get(env_id, {})
        self.env_id = env_id
        if env_id == "TetrisA-v2":
            self._env = JoypadSpace(gym_tetris.make(env_id, **extra_args), SIMPLE_MOVEMENT)
        elif "ple" in env_id:
            self._env = gym_ple.make(env_id, **extra_args)
        elif "MiniGrid" in env_id:
            # self._env = AbsoluteActionGrid(FullyObsWrapper(gym.make(env_id)))
            self._env = AbsoluteActionGrid(RGBImgObsWrapper(gym.make(env_id)))
        elif "Sokoban" in env_id:
            self._env = TinySokoban(gym.make(env_id, **extra_args))
        elif "MazeEnv" in env_id:
            self._env = MazeEnvImage(mazenv.Env(mazenv.prim((8, 8))), randomize=True)
        else:
            self._env = gym.make(env_id, **extra_args)

        self._env.seed(seed)
        self.action_repeat = GAME_ENVS_ACTION_REPEATS.get(env_id, 1)
        self.max_episode_length = max_episode_length * self.action_repeat
        self.t = 0
예제 #13
0
    def _thunk():
        env = gym_tetris.make(env_id)

        is_atari = hasattr(gym.envs, 'atari') and isinstance(
            env.unwrapped, gym.envs.atari.atari_env.AtariEnv)
        if is_atari:
            env = make_atari(env_id)

        env.seed(seed + rank)

        obs_shape = env.observation_space.shape

        if is_atari:
            if len(env.observation_space.shape) == 3:
                env = wrap_deepmind(env)
        elif len(env.observation_space.shape) == 3:
            env = JoypadSpace(env, SIMPLE_MOVEMENT)

        # If the input has shape (W,H,3), wrap for PyTorch convolutions
        obs_shape = env.observation_space.shape
        if len(obs_shape) == 3 and obs_shape[2] in [1, 3]:
            env = TransposeImage(env)

        return env
예제 #14
0
from nes_py.wrappers import JoypadSpace
import gym_tetris
from gym_tetris.actions import SIMPLE_MOVEMENT
import time

import numpy as np
import matplotlib.pyplot as plt

import cv2
import keyboard

win_name = 'Tetris'

env = gym_tetris.make('TetrisA-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

done = True


def get_keyboard_action():
    while True:
        # x = '0123456789'
        # x = [i for i in x if keyboard.is_pressed(i)]
        # x = [*x, '0']
        # return int(x[0])

        if keyboard.is_pressed('a'):
            return 1
        if keyboard.is_pressed('d'):
            return 2
        return 0
예제 #15
0
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt

from nes_py.wrappers import JoypadSpace
import gym_tetris
import gym
from gym_tetris.actions import SIMPLE_MOVEMENT

env = gym_tetris.make("TetrisA-v2")
env = JoypadSpace(env, SIMPLE_MOVEMENT)

#action size is NES simple input for tetris
#state size is (240, 256, 3)
num_actions = len(SIMPLE_MOVEMENT)
state_size = 214

episode = 0
running = True
isTrained = False
state = env.reset()


def getBoard(state):
    state = state[48:208, 96:176]  #Get tetris board only (not whole screen)
    state = np.mean(state, -1)

    #take middle of pixel
    board = np.zeros((20, 10))
    for line in range(4, len(state), 8):
예제 #16
0
 def env_creator(env_config):
     env = gym_tetris.make(version)
     env = JoypadSpace(env, MOVEMENT)
     env = WarpFrame(env, dim=84)
     return env
예제 #17
0
def dqn():
    env = gym_tetris.make('TetrisA-v2')
    env = JoypadSpace(env, MOVEMENT)
    episodes = 2000
    max_steps = None
    epsilon_stop_episode = 1500
    mem_size = 20000
    discount = 0.95
    batch_size = 512
    epochs = 1
    render_every = 50
    log_every = 50
    replay_start_size = 2000
    train_every = 1
    n_neurons = [32, 32]
    render_delay = None
    activations = ['relu', 'relu', 'linear']

    agent = DQNAgent(env.get_state_size(),
                     n_neurons=n_neurons,
                     activations=activations,
                     epsilon_stop_episode=epsilon_stop_episode,
                     mem_size=mem_size,
                     discount=discount,
                     replay_start_size=replay_start_size)

    log_dir = f'logs/tetris-nn={str(n_neurons)}-mem={mem_size}-bs={batch_size}-e={epochs}-{datetime.now().strftime("%Y%m%d-%H%M%S")}'
    log = CustomTensorBoard(log_dir=log_dir)

    scores = []

    for episode in tqdm(range(episodes)):
        current_state = env.reset()
        done = False
        steps = 0

        if render_every and episode % render_every == 0:
            render = True
        else:
            render = False

        # Game
        while not done and (not max_steps or steps < max_steps):
            next_states = env.get_next_states()
            best_state = agent.best_state(next_states.values())

            best_action = None
            for action, state in next_states.items():
                if state == best_state:
                    best_action = action
                    break

            reward, done = env.play(best_action[0],
                                    best_action[1],
                                    render=render,
                                    render_delay=render_delay)

            agent.add_to_memory(current_state, next_states[best_action],
                                reward, done)
            current_state = next_states[best_action]
            steps += 1

        scores.append(env.get_game_score())

        # Train
        if episode % train_every == 0:
            agent.train(batch_size=batch_size, epochs=epochs)

        # Logs
        if log_every and episode and episode % log_every == 0:
            avg_score = mean(scores[-log_every:])
            min_score = min(scores[-log_every:])
            max_score = max(scores[-log_every:])

            log.log(episode,
                    avg_score=avg_score,
                    min_score=min_score,
                    max_score=max_score)