コード例 #1
0
ファイル: retro_wrappers.py プロジェクト: MrGoogol/baselines
def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs):
    import retro
    if state is None:
        state = retro.State.DEFAULT
    env = retro.make(game, state, **kwargs)
    env = StochasticFrameSkip(env, n=4, stickprob=0.25)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env
コード例 #2
0
def make_env(num_steps, stack=True, scale_rew=True):
    """
    Create an environment with some standard wrappers.
    """
    env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1')
    env = gym.wrappers.TimeLimit(env, max_episode_steps=num_steps)
    env = SonicDiscretizer(env)
    env = AllowBacktracking(env)
    if scale_rew:
        env = RewardScaler(env, REWARD_RATE)
    env = WarpFrame(env)
    if stack:
        env = FrameStack(env, NUM_STATES)
    return env
コード例 #3
0
    #atrapando otras teclas
    if keys[K_c]:
        buttons[8] = 1
    if keys[K_y]:
        buttons[9] = 1
    if keys[K_x]:
        buttons[10] = 1
    if keys[K_z]:
        buttons[11] = 1

    return buttons


pygame.init()
env = retro.make('SonicTheHedgehog-Genesis', 'GreenHillZone.Act1')
screen = pygame.display.set_mode(video_size)
env.reset()

done = False
clock = pygame.time.Clock()

unique_filename = str(datetime.datetime.now().date()) + '_' + str(
    datetime.datetime.now().time()).replace(':', '-')

#Utilizamos a para crear el archivo si no existe, em caso de existir lo abrimos y lo modificamos
with open('sonic-data-' + unique_filename + '.csv', 'a', newline='') as file:
    writer = csv.writer(file)
    writer.writerow([
        "B", "A", "MODE", "START", "UP", "DOWN", "LEFT", "RIGHT", "C", "Y",
        "X", "Z"
コード例 #4
0
import retro
import os
import time
from stable_baselines import PPO2
from stable_baselines.common.policies import CnnPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from baselines.common.retro_wrappers import *
from stable_baselines.bench import Monitor
from stable_baselines.common import set_global_seeds
from policy import TransformerPolicy

env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1')
env = SonicDiscretizer(env)
env = RewardScaler(env, scale=0.01)
env = CustomRewardAndDoneEnv(env)
env = StochasticFrameSkip(env, n=4, stickprob=0.25)
env = Downsample(env, 2)
env = Rgb2gray(env)
env = FrameStack(env, 4)
env = ScaledFloatFrame(env)
env = TimeLimit(env, max_episode_steps=4500)
env = Monitor(env, log_dir, allow_early_resets=True)
print('行動空間: ', env.action_space)
print('状態空間: ', env.observation_space)

env.seed(1234)
set_global_seeds(1234)

env = DummyVecEnv([lambda: env])

model = PPO2(policy=CnnPolicy,
コード例 #5
0
import tensorflow as tf
import numpy as np
import retro
from skimage import transform
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
import warnings
import collections
from tensorflow import keras

env = retro.make('SpaceInvaders-Atari2600')
# state_size = env.observation_space
action_size = env.action_space.n
action_oh = np.array(np.identity(action_size, dtype=np.int).tolist())


def preprocess_frame(frame):
    gray = rgb2gray(frame)
    cropped_frame = gray[8:-12, 4:-12]
    # Normalise pixel values
    normalised_frame = cropped_frame / 255.
    # Resize for
    preprocessed_frame = transform.resize(normalised_frame, [110, 84])
    return preprocessed_frame


stack_size = 4
stacked_frames = collections.deque(
    [np.zeros([110, 84], dtype=np.int) for i in range(stack_size)],
    maxlen=stack_size)
コード例 #6
0
def main():

    env = retro.make(game=params['ENVIRONMENT'],
                     use_restricted_actions=retro.Actions.DISCRETE)

    action_space = env.action_space.n if params[
        'USE_FULL_ACTION_SPACE'] else params['SMALL_ACTION_SPACE']
    env.action_space = spaces.Discrete(action_space)

    use_time_cutoff = params['USE_TIME_CUTOFF']

    img_width = params['IMG_WIDTH']
    img_height = params['IMG_HEIGHT']
    channels = 1 if params['GRAYSCALE'] else 3

    model = GalagaAgent(action_space, img_width, img_height, channels)

    weight_files = [
        "m_weights_SQU.h5", "m_weights_SQP.h5", "m_weights_DQU.h5",
        "m_weights_DQP.h5"
    ]
    labels = [
        "Single-Q Uniform", "Single-Q Prioritized", "Double-Q Uniform",
        "Double-Q Prioritized"
    ]

    target_update_every = params['TARGET_UPDATE_EVERY']

    replay_memory_size = params['REPLAY_MEMORY_SIZE']
    score_window = deque(maxlen=replay_memory_size)

    epochs = 1
    epoch_length = 10000
    frame_count = 0
    for i in range(4):
        model.load_weights(weight_files[i])
        print(labels[i])

        for epoch in range(epochs):
            state = env.reset()
            done = False
            last_score = 0
            time = 0
            reward_window = deque(maxlen=epoch_length)

            while not done:

                state = preprocess(state, img_width, img_height, channels)

                action = model.get_action(state)

                next_state, reward, done, info = env.step(action[0])

                state = next_state

                reward_window.append(reward)

                if time > epoch_length:
                    break

                time += 1
                env.render()

            score_window.append(info['score'])

            print("\rEpisode: %d/%d, Score: %d" %
                  (epoch + 1, epochs, info['score']))
コード例 #7
0
import retro  # Retro Environment

from skimage import transform  # Help us to preprocess the frames
from skimage.color import rgb2gray  # Help us to gray our frames
from scipy.special import softmax

import matplotlib.pyplot as plt  # Display graphs

from collections import deque  # Ordered collection with ends

import random

from src.rl.space_invaders.space_inv_src.NNet import *
from src.rl.space_invaders.space_inv_src.utils import *

env = retro.make(game="SpaceInvaders-Atari2600")

print("The size of our frame is: ", env.observation_space)
print("The action size is : ", env.action_space.n)

# Here we create an hot encoded version of our actions
# possible_actions = [[1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0]...]
possible_actions = np.array(
    np.identity(env.action_space.n, dtype=int).tolist())
normalized_frame_size = (110, 84)

stack_size = 4

stacked_frames = deque(
    [np.zeros(normalized_frame_size, dtype=np.int) for i in range(stack_size)],
    maxlen=4)
コード例 #8
0
cbs = env_callbacks(summary_writer, args.environment, interval)
#end logging stuff

from aegis_core.flask_controller import FlaskController
from aegis_core.env_engine import EnvEngine

log = logging.getLogger('werkzeug')
log.setLevel(logging.ERROR)

matplotlib.use("Agg")  #threading issue

print("Creating {}...".format(args.environment))
obs_type = retro.Observations[args.obs_type.upper()]
action_type = retro.Actions[args.action_type.upper()]
env = retro.make(args.environment,
                 obs_type=obs_type,
                 use_restricted_actions=action_type)
end_reward = args.end_reward

print("Observation space:", env.observation_space)
print("Action space:", env.action_space)
#print(env.observation_space.low, env.observation_space.high)

#both ram and image are 0-255 i think
obs_scale = lambda x: x / 255.

engine = EnvEngine(env,
                   end_reward,
                   action_url=args.url,
                   run_name=args.name,
                   reward_proxy=args.proxy,
コード例 #9
0
@author: Parth
"""

import retro  #import gym-retro

#import policy, model and emvironment vectoriser from stable baselines
from stable_baselines.common.policies import CnnPolicy
from stable_baselines import A2C
from stable_baselines.common.vec_env import DummyVecEnv

#define some hyper parameters
params = {"learning_rate": 1e-3, "gamma": 0.99}

#initialise the environment
env = retro.make('Airstriker-Genesis')

#create the agent
agent = A2C(CnnPolicy, DummyVecEnv([lambda: env]), **params)

#reset the environment
obs, state, dones = env.reset(), None, [False]

#train the agent for x amount of timesteps
agent.learn(total_timesteps=1000)

#once the environment is trained, run a while loop
while True:
    actions, state = agent.predict(obs, state=state, mask=dones)
    obs, rew, done, info = env.step(actions)
    env.render()
コード例 #10
0
ファイル: runner.py プロジェクト: msaroufim/RL-CLI
parser.add_argument(
    '--verbose',
    '-v',
    action='count',
    default=1,
    help='increase verbosity (can be specified multiple times)')
parser.add_argument(
    '--quiet',
    '-q',
    action='count',
    default=0,
    help='decrease verbosity (can be specified multiple times)')
args = parser.parse_args()

env = retro.make(args.game,
                 args.state or retro.STATE_DEFAULT,
                 scenario=args.scenario,
                 record=args.record)
verbosity = args.verbose - args.quiet

# if training proceeds slowly then try varying parameters here
act = deepq.learn(env,
                  q_func=model,
                  lr=1e-3,
                  max_timesteps=100000,
                  buffer_size=50000,
                  exploration_fraction=0.1,
                  exploration_final_eps=0.02,
                  print_freq=10,
                  callback=callback)

コード例 #11
0
ファイル: emulator.py プロジェクト: fdelencl/AI-sandbox
    def __init__(self,
                 game='Gradius-Nes',
                 state='Level1',
                 scenario=None,
                 info=None,
                 render=True,
                 fps=50,
                 recorder=None,
                 reader=None):

        self.action_spectrum = [
            [0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 1, 0, 0, 0, 0],  # up
            [0, 0, 0, 0, 1, 0, 1, 0, 0],  # up left
            [0, 0, 0, 0, 1, 0, 0, 1, 0],  # up right
            [0, 0, 0, 0, 0, 1, 0, 0, 0],  # down
            [0, 0, 0, 0, 0, 1, 1, 0, 0],  # down left
            [0, 0, 0, 0, 0, 1, 0, 1, 0],  # down right
            [0, 0, 0, 0, 0, 0, 1, 0, 0],  # left
            [0, 0, 0, 0, 0, 0, 0, 1, 0],  # right
            [0, 0, 0, 0, 0, 0, 0, 0, 1],  # shoot
            [0, 0, 0, 0, 1, 0, 0, 0, 1],  # shoot up
            [0, 0, 0, 0, 1, 0, 1, 0, 1],  # shoot up left
            [0, 0, 0, 0, 1, 0, 0, 1, 1],  # shoot up right
            [0, 0, 0, 0, 0, 1, 0, 0, 1],  # shoot down
            [0, 0, 0, 0, 0, 1, 1, 0, 1],  # shoot down left
            [0, 0, 0, 0, 0, 1, 0, 1, 1],  # shoot down right
            [0, 0, 0, 0, 0, 0, 1, 0, 1],  # shoot left
            [0, 0, 0, 0, 0, 0, 0, 1, 1],  # shoot right
            [1, 0, 0, 0, 0, 0, 0, 0, 0],  # opt
            [1, 0, 0, 0, 1, 0, 0, 0, 0],  # opt up
            [1, 0, 0, 0, 1, 0, 1, 0, 0],  # opt up left
            [1, 0, 0, 0, 1, 0, 0, 1, 0],  # opt up right
            [1, 0, 0, 0, 0, 1, 0, 0, 0],  # opt down
            [1, 0, 0, 0, 0, 1, 1, 0, 0],  # opt down left
            [1, 0, 0, 0, 0, 1, 0, 1, 0],  # opt down right
            [1, 0, 0, 0, 0, 0, 1, 0, 0],  # opt left
            [1, 0, 0, 0, 0, 0, 0, 1, 0],  # opt right
            [1, 0, 0, 0, 0, 0, 0, 0, 1],  # opt shoot
            [1, 0, 0, 0, 1, 0, 0, 0, 1],  # opt shoot up
            [1, 0, 0, 0, 1, 0, 1, 0, 1],  # opt shoot up left
            [1, 0, 0, 0, 1, 0, 0, 1, 1],  # opt shoot up right
            [1, 0, 0, 0, 0, 1, 0, 0, 1],  # opt shoot down
            [1, 0, 0, 0, 0, 1, 1, 0, 1],  # opt shoot down left
            [1, 0, 0, 0, 0, 1, 0, 1, 1],  # opt shoot down right
            [1, 0, 0, 0, 0, 0, 1, 0, 1],  # opt shoot left
            [1, 0, 0, 0, 0, 0, 0, 1, 1],  # opt shoot right
        ]

        self.render = render
        self.fps = fps
        self.recorder = recorder
        self.reader = reader
        self.env = retro.make(game=game, state=state, info=info)
        self.env.reset()
        self.user_actions = [0, 0, 0, 0, 0, 0, 0, 0, 0]
        self.end_game = False
        if (render):
            self.env.render()
            self.env.unwrapped.viewer.window.on_key_press = self.on_key_press
            self.env.unwrapped.viewer.window.on_key_release = self.on_key_release
コード例 #12
0
import retro
import time
import os
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
from stable_baselines.common import set_global_seeds
from stable_baselines.bench import Monitor
from baselines.common.retro_wrappers import *
from util import log_dir, callback, AirstrikerDiscretizer, CustomRewardAndDoneEnv

# 環境の生成 (1)
env = retro.make(game='Airstriker-Genesis', state='Level1')
env = AirstrikerDiscretizer(env)  # 行動空間を離散空間に変換
env = CustomRewardAndDoneEnv(env)  # 報酬とエピソード完了の変更
env = StochasticFrameSkip(env, n=4, stickprob=0.25)  # スティッキーフレームスキップ
env = Downsample(env, 2)  # ダウンサンプリング
env = Rgb2gray(env)  # グレースケール
env = FrameStack(env, 4)  # フレームスタック
env = ScaledFloatFrame(env)  # 状態の正規化
env = Monitor(env, log_dir, allow_early_resets=True)
print('行動空間: ', env.action_space)
print('状態空間: ', env.observation_space)

# シードの指定
env.seed(0)
set_global_seeds(0)

# ベクトル化環境の生成
env = DummyVecEnv([lambda: env])

# モデルの生成
コード例 #13
0
    '--obs-type',
    '-o',
    default='image',
    choices=['image', 'ram'],
    help='the observation type, either `image` (default) or `ram`')
parser.add_argument('--players',
                    '-p',
                    type=int,
                    default=1,
                    help='number of players/agents (default: 1)')
args = parser.parse_args()

obs_type = retro.Observations.IMAGE if args.obs_type == 'image' else retro.Observations.RAM
env = retro.make(args.game,
                 args.state or retro.State.DEFAULT,
                 scenario=args.scenario,
                 record=args.record,
                 players=args.players,
                 obs_type=obs_type)
verbosity = args.verbose - args.quiet
try:
    while True:
        ob = env.reset()
        t = 0
        totrew = [0] * args.players
        while True:
            ac = env.action_space.sample()
            ob, rew, done, info = env.step(ac)
            t += 1
            if t % 10 == 0:
                if verbosity > 1:
                    infostr = ''
コード例 #14
0
  Render bk2 file to mp4 requires fmpeg: python -m retro.scripts.playback_movie winner.bk2

  This playback demonstrates that if you stopped the simulation manually for some condition
  other than "done" returned from env.step, you will need to do that as well in the neural network
  playback, otherwise the simulation will continue and the neural network will keep taking actions.
"""

import retro  # pip install gym-retro
import numpy as np  # pip install numpy
import cv2  # pip install opencv-python
import neat  # pip install neat-python
import pickle  # pip install cloudpickle

# create retro environment: game, state, scenario (defines rewards)
environment = retro.make('SonicTheHedgehog-Genesis',
                         'GreenHillZone.Act2',
                         scenario='contest',
                         record='.')

# reset environment to initial state
observation = environment.reset()

# configuration for playback from pkl must be the same as execution
config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'config-neat')

# NEAT setup
population = neat.Population(config)
population.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
population.add_reporter(stats)
コード例 #15
0
import retro
import neat
import cv2
import os
import time
import numpy as np
import pickle
from statistics import mean
from handlers import *

env = retro.make("SonicTheHedgehog3-Genesis")
info(env)


def eval_genomes(genomes, config):
    times_level_finished = 0
    member = 0
    generation_fitness_log = []

    for genome_id, genome in genomes:
        state = env.reset()

        net = neat.nn.recurrent.RecurrentNetwork.create(genome, config)

        temp_state, _, _, _ = env.step(env.action_space.sample())
        WIDTH  = int(temp_state.shape[0]/8)
        HEIGHT = int(temp_state.shape[1]/8)

        done = False
        current_fitness = 0
        generation_max_fitness = 0
コード例 #16
0
ファイル: brute.py プロジェクト: zhang921210/retro
                              self._max_episode_steps)
        steps, total_rew = rollout(self._env, acts)
        executed_acts = acts[:steps]
        self.node_count += update_tree(self._root, executed_acts, total_rew)
        return executed_acts, total_rew


def brute_retro(
    game,
    max_episode_steps=4500,
    timestep_limit=100_000_000,
    state=retro.State.DEFAULT,
    scenario=None,
):
    env = retro.make(game,
                     state,
                     use_restricted_actions=retro.Actions.DISCRETE,
                     scenario=scenario)
    env = Frameskip(env)
    env = TimeLimit(env, max_episode_steps=max_episode_steps)

    brute = Brute(env, max_episode_steps=max_episode_steps)
    timesteps = 0
    best_rew = float('-inf')
    while True:
        acts, rew = brute.run()
        timesteps += len(acts)

        if rew > best_rew:
            print(f"new best reward {best_rew} => {rew}")
            best_rew = rew
            env.unwrapped.record_movie("best.bk2")
コード例 #17
0
ファイル: doom.py プロジェクト: chriscremer/Other_Code
env = gym.make('CarRacing-v0')
env.reset()
for _ in range(100): # run for 1000 steps
    # env.render()
    action = env.action_space.sampe() # pick a random action
    env.step(action) # take action

    print (action)

fdsfa



v
import retro
env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1')


fdsa



import gym
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
コード例 #18
0
ファイル: total_random.py プロジェクト: PeerM/starman
        self.action_space = Discrete(2)

    def action(self, action):
        new_action = [0] * 9
        new_action[self.index_right] = 1
        new_action[self.index_b] = 1
        new_action[self.index_a] = action
        return new_action


if __name__ == '__main__':
    monitor = "results/unbiased_random/1"
    action_repeat = True
    single_life = True
    render = None
    env = retro.make("SuperMarioBros-Nes")
    env = MarioDiscretizer(env)
    if single_life:
        env = SingleLifeEnv(env)
    if monitor is not None:
        env = Monitor(env, monitor, video_callable=lambda i: False)
    if render is not None:
        env = AutoRenderer(env, auto_render_period=render)
    if action_repeat:
        env = FrameStack(env, 4)
    # model = WeightedRandomAgent()
    model = RandomAgent(lambda: env.action_space.sample())
    player = BasicRoller(env, model, min_episodes=1)
    # total_rollouts = [player.rollouts() for rollout_i in trange(40)]
    # flat_rollouts = reduce(list.__add__, total_rollouts)
    # total_rewards = map(lambda r: r.total_reward, flat_rollouts)
コード例 #19
0
ファイル: ai_plays.py プロジェクト: 4thSword/Ghost-AI-Goblins
        checkpoints = [f for f in glob.glob('neat-checkpoint-*')]
        checkpoints = [int(f[16:]) for f in checkpoints]
        checkpoints.sort()
        return neat.Checkpointer.restore_checkpoint(
            'neat-checkpoint-{}'.format(checkpoints[-1]))
    except:
        print(
            'No checkpoints in our folder, starting training from generation 0'
        )
        return neat.Population(config)


if __name__ == "__main__":
    # Creates our ghosts and goblings environment:
    #env = retro.make('GhostsnGoblins-Nes','Level1')
    env = retro.make(game='GhostsnGoblins-Nes', record='../records')
    # Loads our selected configuration for our Neat neural network:
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         'config-feedforward')
    '''
    with open('winner.pkl', 'rb') as input_file:
        genome = pickle.load(input_file)
    '''

    # Restore the last checkpoint if exist, else starts from zero:
    p = load_last_checkpoint()

    # Uncomment to restore a selected checkpoint if don't want to restore last checkpoint
    #p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-803')
コード例 #20
0
import retro

from stable_baselines.common.policies import CnnLstmPolicy
from stable_baselines.common.vec_env import SubprocVecEnv
from stable_baselines import PPO2

n_cpu = 12
env = SubprocVecEnv(
    [lambda: retro.make('Airstriker-Genesis') for i in range(n_cpu)])

model = PPO2.load('ppo2_airstriker', env)

obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()
コード例 #21
0
    def __init__(self, config: Optional[Config] = None):
        super().__init__()
        global args
        self.config = config
        self.top = 150
        self.left = 150
        self.width = 1100
        self.height = 700

        self.title = 'Super Mario Bros AI'
        self.current_generation = 0
        # This is the generation that is actual 0. If you load individuals then you might end up starting at gen 12, in which case
        # gen 12 would be the true 0
        self._true_zero_gen = 0

        self._should_display = True
        self._timer = QTimer(self)
        self._timer.timeout.connect(self._update)
        # Keys correspond with B, NULL, SELECT, START, U, D, L, R, A
        # index                0  1     2       3      4  5  6  7  8
        self.keys = np.array( [0, 0,    0,      0,     0, 0, 0, 0, 0], np.int8)

        # I only allow U, D, L, R, A, B and those are the indices in which the output will be generated
        # We need a mapping from the output to the keys above
        self.ouput_to_keys_map = {
            0: 4,  # U
            1: 5,  # D
            2: 6,  # L
            3: 7,  # R
            4: 8,  # A
            5: 0   # B
        }

        # Initialize the starting population
        individuals: List[Individual] = []

        # Load any individuals listed in the args.load_inds
        num_loaded = 0
        if args.load_inds:
            # Overwrite the config file IF one is not specified
            if not self.config:
                try:
                    self.config = Config(os.path.join(args.load_file, 'settings.config'))
                except:
                    raise Exception(f'settings.config not found under {args.load_file}')

            set_of_inds = set(args.load_inds)

            for ind_name in os.listdir(args.load_file):
                if ind_name.startswith('best_ind_gen'):
                    ind_number = int(ind_name[len('best_ind_gen'):])
                    if ind_number in set_of_inds:
                        individual = load_mario(args.load_file, ind_name, self.config)
                        # Set debug stuff if needed
                        if args.debug:
                            individual.name = f'm{num_loaded}_loaded'
                            individual.debug = True
                        individuals.append(individual)
                        num_loaded += 1
            
            # Set the generation
            self.current_generation = max(set_of_inds) + 1  # +1 becauase it's the next generation
            self._true_zero_gen = self.current_generation

        # Load any individuals listed in args.replay_inds
        if args.replay_inds:
            # Overwrite the config file IF one is not specified
            if not self.config:
                try:
                    self.config = Config(os.path.join(args.replay_file, 'settings.config'))
                except:
                    raise Exception(f'settings.config not found under {args.replay_file}')

            for ind_gen in args.replay_inds:
                ind_name = f'best_ind_gen{ind_gen}'
                fname = os.path.join(args.replay_file, ind_name)
                if os.path.exists(fname):
                    individual = load_mario(args.replay_file, ind_name, self.config)
                    # Set debug stuff if needed
                    if args.debug:
                        individual.name= f'm_gen{ind_gen}_replay'
                        individual.debug = True
                    individuals.append(individual)
                else:
                    raise Exception(f'No individual named {ind_name} under {args.replay_file}')
        # If it's not a replay then we need to continue creating individuals
        else:
            num_parents = max(self.config.Selection.num_parents - num_loaded, 0)
            for _ in range(num_parents):
                individual = Mario(self.config)
                # Set debug stuff if needed
                if args.debug:
                    individual.name = f'm{num_loaded}'
                    individual.debug = True
                individuals.append(individual)
                num_loaded += 1

        self.best_fitness = 0.0
        self._current_individual = 0
        self.population = Population(individuals)

        self.mario = self.population.individuals[self._current_individual]
        
        self.max_distance = 0  # Track farthest traveled in level
        self.max_fitness = 0.0
        self.env = retro.make(game='SuperMarioBros-Nes', state=f'Level{self.config.Misc.level}')

        # Determine the size of the next generation based off selection type
        self._next_gen_size = None
        if self.config.Selection.selection_type == 'plus':
            self._next_gen_size = self.config.Selection.num_parents + self.config.Selection.num_offspring
        elif self.config.Selection.selection_type == 'comma':
            self._next_gen_size = self.config.Selection.num_offspring

        # If we aren't displaying we need to reset the environment to begin with
        if args.no_display:
            self.env.reset()
        else:
            self.init_window()

            # Set the generation in the label if needed
            if args.load_inds:
                txt = "<font color='red'>" + str(self.current_generation + 1) + '</font>'  # +1 because we switch from 0 to 1 index
                self.info_window.generation.setText(txt)

            # if this is a replay then just set current_individual to be 'replay' and set generation
            if args.replay_file:
                self.info_window.current_individual.setText('Replay')
                txt = f"<font color='red'>{args.replay_inds[self._current_individual] + 1}</font>"
                self.info_window.generation.setText(txt)

            self.show()

        if args.no_display:
            self._timer.start(1000 // 1000)
        else:
            self._timer.start(1000 // 60)
コード例 #22
0
## Run the selected game and state from here

import gym
import retro
from stable_baselines.common.policies import MlpPolicy, CnnPolicy
from ppo2template import PPO2
from brute import TimeLimit
from discretizer import SF2Discretizer

env = retro.make('StreetFighterIISpecialChampionEdition-Genesis',
                 'Champion.Level1.RyuVsGuile.state',
                 obs_type=retro.Observations.IMAGE
                 )  #change to compare IMAGE to RAM observations
env = SF2Discretizer(env)
env = TimeLimit(env, max_episode_steps=2500)

model = PPO2(MlpPolicy, env, n_steps=2500, verbose=2).learn(
    75000)  #put the selected policy and episode steps in here.
model.save("ppo2_esf")

#del model # remove to demonstrate saving and loading

#model = PPO2.load("ppo2_esf") # load a saved file

# Enjoy trained agent
obs = env.reset()
timesteps = 0
totalrewards = 0.0
#env.unwrapped.record_movie("PPOII.bk2") #to start saving the recording
while True:
    action, _states = model.predict(obs)
コード例 #23
0
ファイル: playback.py プロジェクト: OBINJAWED/Snes-Neat
import retro
import numpy as np
import cv2
import neat
import pickle

env = retro.make('SuperMarioWorld-Snes', 'YoshiIsland2.state', record=True)

imgarray = []

xpos_end = 0

config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'config-feedforward')

p = neat.Population(config)

p.add_reporter(neat.StdOutReporter(True))
stats = neat.StatisticsReporter()
p.add_reporter(stats)

with open('winner.pkl', 'rb') as input_file:
    genome = pickle.load(input_file)

ob = env.reset()
ac = env.action_space.sample()

inx, iny, inc = env.observation_space.shape

inx = int(inx / 8)
コード例 #24
0
import retro
import numpy as np
import cv2
import neat
import pickle

# create game environment
env = retro.make('SuperMarioWorld-Snes', 'DonutPlains1')

# create a window to show the gameplay
cv2.namedWindow('SuperMarioWorld-Snes | NEAT-Python | jubatistim',
                cv2.WINDOW_NORMAL)
cv2.moveWindow("SuperMarioWorld-Snes | NEAT-Python | jubatistim", 950, 120)
cv2.resizeWindow('SuperMarioWorld-Snes | NEAT-Python | jubatistim', 800, 600)

# generation
generation = 9


# funtion to evaluate genomes during training process
def eval_genomes(genomes, config):

    # generation
    global generation
    generation += 1

    for genome_id, genome in genomes:

        #######################################################################################################
        ###########################################control variables###########################################
        #######################################################################################################
コード例 #25
0
ファイル: one_player.py プロジェクト: mikipacman/retro-rl
opponents = ["Raiden", "Jax", "SubZero", "Scorpion", "Baraka"]
left_players = available_opponents
versions = range(32)

counter = 0
num_to_generate = len(difficulties) * len(arenas) * len(opponents) * len(
    versions) * len(left_players)
start = time.time()

for difficulty in difficulties:
    for arena in arenas:
        for opp in opponents:
            for p1 in left_players:
                for version in versions:
                    env = retro.make(
                        game_name,
                        players=2,
                        state=f"{difficulty}_{arena}_{opp}_{version}")
                    env.reset()

                    # Get paths for cursors
                    p1_cords = np.where(fighters_matrix == p1)
                    p1_actions = ["RIGHT", "LEFT"]
                    for _ in range(p1_cords[0][0]):
                        p1_actions.append("DOWN")

                    for _ in range(p1_cords[1][0]):
                        p1_actions.append("RIGHT")

                    p1_actions.append("A")

                    # Execute and save
コード例 #26
0
import retro

movie = retro.Movie('SonicTheHedgehog-Genesis-GreenHillZone.Act1-000000.bk2')
movie.step()

env = retro.make(
    game=movie.get_game(),
    state=None,
    # bk2s can contain any button presses, so allow everything
    use_restricted_actions=retro.Actions.ALL,
    players=movie.players,
)
env.initial_state = movie.get_state()
env.reset()

while movie.step():
    env.render()
    keys = []
    for p in range(movie.players):
        for i in range(env.num_buttons):
            keys.append(movie.get_key(i, p))
    env.step(keys)
コード例 #27
0
import retro
import numpy as np
import cv2
import neat
import pickle
#import os

env = retro.make(game='Airstriker-Genesis')  #Spiel wird geladen


def get_image(inx, iny, ob):
    env.render()
    ob = cv2.resize(ob, (inx, iny))
    ob = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY)
    return ob


def img_flatten(ob):
    imgarray = np.ndarray.flatten(ob)
    return imgarray


def get_action(nnOutput):
    action = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
    action[0] = nnOutput[0]
    action[6] = nnOutput[1]
    action[7] = nnOutput[2]
    return action


ob = env.reset()
コード例 #28
0
import retro
import numpy as np
import cv2
import neat
import pickle

# create game environment
env = retro.make('DonkeyKongCountry-Snes',
                 '1Player.CongoJungle.JungleHijinks.Level1')

# create a window to show the gameplay
cv2.namedWindow('DonkeyKongCountry-Snes | NEAT-Python | jubatistim',
                cv2.WINDOW_NORMAL)
cv2.moveWindow("DonkeyKongCountry-Snes | NEAT-Python | jubatistim", 950, 120)
cv2.resizeWindow('DonkeyKongCountry-Snes | NEAT-Python | jubatistim', 800, 600)

# generation
generation = -1


# funtion to evaluate genomes during training process
def eval_genomes(genomes, config):

    # generation
    global generation
    generation += 1

    for genome_id, genome in genomes:

        #######################################################################################################
        ###########################################control variables###########################################
コード例 #29
0
'''
==================================================================================
Name        : Altered Beast AI
Author      : Kenshiro
Version     : 1.00
Copyright   : GNU General Public License (GPLv3)
Description : Altered Beast AI based on the T-Rex evolutionary neural network
==================================================================================
'''

#Action space: MultiBinary(12)
#Observation space: Box(224, 320, 3)
#Observation high: 255, low: 0

import retro
env = retro.make(game='AlteredBeast-Genesis')

import math
import array
import sys
import time
import cv2

from ctypes import *
so_file = "/usr/local/lib/libT-Rex.so"
tRex = CDLL(so_file)

#OBSERVATION DATA
SCREEN_ROWS = 224
SCREEN_COLUMNS = 320
SKIPPED_PIXELS = 2
コード例 #30
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @File  : mario.py
# @Author: [email protected]
# @Date  : 2019-03-21
# @Desc  :
import retro
import random

env = retro.make(game='SuperMarioBros-Nes')
actions = {
    #   0  1  2  3  4  5  6  7  8
    #   b  n        u  d  l  r  a
    0: [0, 1, 0, 0, 0, 0, 0, 0, 0],  # noop
    1: [0, 0, 0, 0, 0, 0, 1, 0, 0],  # left
    2: [0, 0, 0, 0, 0, 0, 0, 1, 0],  # right
    3: [0, 0, 0, 0, 0, 0, 0, 0, 1],  # a
    4: [0, 0, 0, 0, 0, 0, 0, 1, 1],  # right + a
    5: [1, 0, 0, 0, 0, 0, 0, 1, 0],  # right + b
    6: [1, 0, 0, 0, 0, 0, 0, 1, 1],  # right + a +b
}


def sample():
    return random.randint(0, 6)


if __name__ == '__main__':
    obs = env.reset()
    for i in range(100000):
        act_index = sample()
コード例 #31
0
# In[2]:

#pip install -r requirements.txt

# In[3]:

import retro
import numpy as np
import cv2
import neat
import pickle

# In[4]:

env = retro.make(game='Airstriker-Genesis')
obs = env.reset()
done = False
imgarray = []

# In[17]:


def eval_genomes(genomes, config):

    for genome_id, genome in genomes:
        obs = env.reset()
        inx, iny, inc = env.observation_space.shape

        inx = int(inx / 8)
        iny = int(iny / 8)
コード例 #32
0
def eval_genomes(genomes: list, config: neat.config.Config) -> None:
    """O objetivo dessa função é selecionar os melhores indivíduos em 
    uma população atribuindo a cada um deles um Fitness para indicar o
    quão bom são.

    Args:
        genomes (list): Uma lista contendo todos os indivíduos da população (que
        são da classe 'neat.genome.DefaultGenome').
        config (neat.config.Config): Arquivo de texto contendo as configurações
        para a biblioteca NEAT-Python (obrigatório) como o número de inputs, outputs,
        funções de ativação, tamanho da população e etc.
    """
    global is_new_better  # Usado para verificar se foi encontrado um novo melhor indivíduo durante o treinamento

    env = retro.make(game='SuperMarioWorld-Snes',
                     state='YoshiIsland2',
                     players=1)

    print('\n################### INICIO ###################\n\n')
    # Fazendo uma iteração sobre todos os indivíduos da população para atribuir a cada um o Fitness correspondente
    for genome_id, genome in genomes:
        env.reset()
        net = neat.nn.FeedForwardNetwork.create(
            genome, config)  # Criando a Rede Neural

        penalty = 0  # Penalidade dada ao Mário quando ele "enfrenta" cara a cara um inimigo
        frame = 0  # Utilizado para calcular quantos frames já passaram e verificar se o Mário ficou preso em um "Loop"
        count = 0  # Utilizado para verificar se o Mário travou no eixo x (dx = 0 -> c += 1)
        t = 0  # Usado para medirmos o tempo gasto em cada fase
        done = False  # Usado para indicar quando a validação terminará
        print(f'# Individuo ID {genome_id} | População ID {p.generation} #')
        while not done:
            inputs, mario_x, mario_y = ri.getInputs(ri.getRam(
                env))  # Pegando o vetor de entrada e a posição em x do Mário
            output = net.activate(
                inputs)  # Pegando a saída da RNA ao utilizarmos o input acima
            action = [0 if bit < 0.5 else 1 for bit in output
                      ]  # Transformando em binário para simbolizar os botões
            env.step(action)  # Mudamos o ambiente ao executar a ação
            if display:
                env.render(
                )  # Usado para selecionar se é desejado ver ou não a tela do Mário
            frame += 1  # Um frame a mais!
            # Pega o novo vetor de entrada e a nova posição em x do Mário (usados para verificar se o agente ficou preso)
            new_inputs, new_mario_x, new_mario_y = ri.getInputs(ri.getRam(env))

            # Recebendo a RAM do jogo:
            ram = env.get_ram()

            # Indica que o Mário morreu
            if ram[0x0071] == 9: done = True

            # Verificar se o Mário travou
            if new_mario_x == mario_x: count += 1
            else: count = 0
            if count > 100:
                done = True  # Se ficou parado em x quando count bate 100, encerramos a validação

            # Sinaliza que o Mário ficou preso em um loop (e.g., indo repetidamente da esquerda para direita)
            marks = [v for v in range(500, 5500, 500)
                     ]  # Marcos úteis para verificar
            for mark in marks:
                if new_mario_x < mark < frame: done = True

            # Posições 97 e 98 são logo a frente do Mário; Usaremos para desencorajar Mário a "enfrentar" os inimigos "cara a cara"
            if -1 in (new_inputs[97], new_inputs[98]): penalty += 5

            # Indica que o mário acabou a fase
            if ram[0x13D9] == 2:
                # Usaremos isso para inserir o fator tempo entre os melhores que terminaram a fase (decidir pelo mais rapido)
                fitness = new_mario_x + 1000 * (390 - t) / 390
                done = True
            else:
                # Fitness é a posição x que o Mário conseguiu alcançar descontado de uma penalização
                fitness = new_mario_x - penalty

            # Mário abriu uma caixa de mensagem (Yoshi ou Bloco de informação)
            while ram[0x1426] != 0:
                env.step(ut.dec2bin(
                    1))  # Botão que vai fazer ele fechar o balão de fala
                if display:
                    env.render(
                    )  # Usado para selecionar se é desejado ver ou não a tela do Mário
                env.step(
                    ut.dec2bin(0)
                )  # Não faz nada (Como se ele estivesse apertando e soltando o botão)
                if display:
                    env.render(
                    )  # Usado para selecionar se é desejado ver ou não a tela do Mário
                new_inputs, new_mario_x, new_mario_y = ri.getInputs(
                    ri.getRam(env))
                ram = env.get_ram()

            # Pegando o tempo in game
            if ram[0x0F30] == 0: t += 1

        # Atualiza o fitness do indivíduo
        genome.fitness = float(fitness)

        # Selecionamos o arquivo com base no argv
        if len(sys.argv) == 1:
            bgf = best_genome_file
        elif len(sys.argv) == 2 and sys.argv[1] == 'new':
            bgf = ng_best_genome_file

        # Se for o melhor, salvamos ele
        if os.path.exists(bgf):
            with open(bgf, 'rb') as file:
                best_genome = pickle.load(file)
            if genome.fitness > best_genome.fitness:
                best_genome = genome
                is_new_better = True
                with open(bgf, 'wb') as file:
                    pickle.dump(genome, file)

        else:  # Se não existir o arquivo de melhor genome, criamos e salvamos o genome atual
            best_genome = genome
            is_new_better = True
            with open(bgf, 'wb') as file:
                pickle.dump(genome, file)

        # Imprime as características desse indivíduo
        print(f'==================== INFO ====================\n'
              f'-> Geração: {p.generation}\n'
              f'-> ID do Indivíduo: {genome_id}\n'
              f'-> Posição final: ({new_mario_x}, {new_mario_y})\n'
              f'-> Penalidade total: {penalty:.2f}\n'
              f'-> Fitness: {genome.fitness:.2f}\n'
              f'-> Tempo gasto (no jogo): {t}s\n'
              f'-> É o melhor: {genome.fitness == best_genome.fitness}\n'
              f'==============================================\n\n')

    print('\n##################### FIM ####################\n\n')

    env.render(close=True)
    del env

    return None
コード例 #33
0
def eval_genome(genome, config):
    environment = retro.make('SuperMarioBros-Nes',
                             'Level1-1',
                             scenario='scenario')

    # reset environment to initial state
    observation = environment.reset()

    # shape/resolution of image created by emulator
    inx, iny, inc = environment.observation_space.shape

    # scale down observation
    inx = int(inx / 8)
    iny = int(iny / 8)

    # create NEAT network alt
    # network = neat.nn.FeedForwardNetwork.create(genome, config)

    # create NEAT network
    network = neat.nn.recurrent.RecurrentNetwork.create(genome, config)

    # set up some variables to track fitness
    current_max_fitness = 0
    fitness = 0
    counter = 0

    finished = False
    while not finished:
        # render the game (be careful!)
        # env.render()

        # resize and reshape the observation image
        observation = cv2.resize(observation, (inx, iny))
        observation = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        observation = np.reshape(observation, (inx, iny))

        # create a single array from 2d pixel data
        img_array = np.ndarray.flatten(observation)

        # create actions from input
        actions = network.activate(img_array)

        # take a peek at actions before translation
        # print(actions)

        # map relu activation output to 0 or 1
        actions = np.where(np.array(actions) <= 0.0, 0.0, 1.0).tolist()

        # take a peek at actions before translation
        # print(actions)

        # increment the emulator state
        observation, reward, done, info = environment.step(actions)

        # update fitness with reward from environment
        fitness += reward

        # give it 250 steps without improvement to improve fitness or restart
        if fitness > current_max_fitness:
            current_max_fitness = fitness
            counter = 0
        else:
            counter += 1

        if done or counter == 250:
            finished = True

    return current_max_fitness