def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs): import retro if state is None: state = retro.State.DEFAULT env = retro.make(game, state, **kwargs) env = StochasticFrameSkip(env, n=4, stickprob=0.25) if max_episode_steps is not None: env = TimeLimit(env, max_episode_steps=max_episode_steps) return env
def make_env(num_steps, stack=True, scale_rew=True): """ Create an environment with some standard wrappers. """ env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1') env = gym.wrappers.TimeLimit(env, max_episode_steps=num_steps) env = SonicDiscretizer(env) env = AllowBacktracking(env) if scale_rew: env = RewardScaler(env, REWARD_RATE) env = WarpFrame(env) if stack: env = FrameStack(env, NUM_STATES) return env
#atrapando otras teclas if keys[K_c]: buttons[8] = 1 if keys[K_y]: buttons[9] = 1 if keys[K_x]: buttons[10] = 1 if keys[K_z]: buttons[11] = 1 return buttons pygame.init() env = retro.make('SonicTheHedgehog-Genesis', 'GreenHillZone.Act1') screen = pygame.display.set_mode(video_size) env.reset() done = False clock = pygame.time.Clock() unique_filename = str(datetime.datetime.now().date()) + '_' + str( datetime.datetime.now().time()).replace(':', '-') #Utilizamos a para crear el archivo si no existe, em caso de existir lo abrimos y lo modificamos with open('sonic-data-' + unique_filename + '.csv', 'a', newline='') as file: writer = csv.writer(file) writer.writerow([ "B", "A", "MODE", "START", "UP", "DOWN", "LEFT", "RIGHT", "C", "Y", "X", "Z"
import retro import os import time from stable_baselines import PPO2 from stable_baselines.common.policies import CnnPolicy from stable_baselines.common.vec_env import DummyVecEnv from baselines.common.retro_wrappers import * from stable_baselines.bench import Monitor from stable_baselines.common import set_global_seeds from policy import TransformerPolicy env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1') env = SonicDiscretizer(env) env = RewardScaler(env, scale=0.01) env = CustomRewardAndDoneEnv(env) env = StochasticFrameSkip(env, n=4, stickprob=0.25) env = Downsample(env, 2) env = Rgb2gray(env) env = FrameStack(env, 4) env = ScaledFloatFrame(env) env = TimeLimit(env, max_episode_steps=4500) env = Monitor(env, log_dir, allow_early_resets=True) print('行動空間: ', env.action_space) print('状態空間: ', env.observation_space) env.seed(1234) set_global_seeds(1234) env = DummyVecEnv([lambda: env]) model = PPO2(policy=CnnPolicy,
import tensorflow as tf import numpy as np import retro from skimage import transform from skimage.color import rgb2gray import matplotlib.pyplot as plt import warnings import collections from tensorflow import keras env = retro.make('SpaceInvaders-Atari2600') # state_size = env.observation_space action_size = env.action_space.n action_oh = np.array(np.identity(action_size, dtype=np.int).tolist()) def preprocess_frame(frame): gray = rgb2gray(frame) cropped_frame = gray[8:-12, 4:-12] # Normalise pixel values normalised_frame = cropped_frame / 255. # Resize for preprocessed_frame = transform.resize(normalised_frame, [110, 84]) return preprocessed_frame stack_size = 4 stacked_frames = collections.deque( [np.zeros([110, 84], dtype=np.int) for i in range(stack_size)], maxlen=stack_size)
def main(): env = retro.make(game=params['ENVIRONMENT'], use_restricted_actions=retro.Actions.DISCRETE) action_space = env.action_space.n if params[ 'USE_FULL_ACTION_SPACE'] else params['SMALL_ACTION_SPACE'] env.action_space = spaces.Discrete(action_space) use_time_cutoff = params['USE_TIME_CUTOFF'] img_width = params['IMG_WIDTH'] img_height = params['IMG_HEIGHT'] channels = 1 if params['GRAYSCALE'] else 3 model = GalagaAgent(action_space, img_width, img_height, channels) weight_files = [ "m_weights_SQU.h5", "m_weights_SQP.h5", "m_weights_DQU.h5", "m_weights_DQP.h5" ] labels = [ "Single-Q Uniform", "Single-Q Prioritized", "Double-Q Uniform", "Double-Q Prioritized" ] target_update_every = params['TARGET_UPDATE_EVERY'] replay_memory_size = params['REPLAY_MEMORY_SIZE'] score_window = deque(maxlen=replay_memory_size) epochs = 1 epoch_length = 10000 frame_count = 0 for i in range(4): model.load_weights(weight_files[i]) print(labels[i]) for epoch in range(epochs): state = env.reset() done = False last_score = 0 time = 0 reward_window = deque(maxlen=epoch_length) while not done: state = preprocess(state, img_width, img_height, channels) action = model.get_action(state) next_state, reward, done, info = env.step(action[0]) state = next_state reward_window.append(reward) if time > epoch_length: break time += 1 env.render() score_window.append(info['score']) print("\rEpisode: %d/%d, Score: %d" % (epoch + 1, epochs, info['score']))
import retro # Retro Environment from skimage import transform # Help us to preprocess the frames from skimage.color import rgb2gray # Help us to gray our frames from scipy.special import softmax import matplotlib.pyplot as plt # Display graphs from collections import deque # Ordered collection with ends import random from src.rl.space_invaders.space_inv_src.NNet import * from src.rl.space_invaders.space_inv_src.utils import * env = retro.make(game="SpaceInvaders-Atari2600") print("The size of our frame is: ", env.observation_space) print("The action size is : ", env.action_space.n) # Here we create an hot encoded version of our actions # possible_actions = [[1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0]...] possible_actions = np.array( np.identity(env.action_space.n, dtype=int).tolist()) normalized_frame_size = (110, 84) stack_size = 4 stacked_frames = deque( [np.zeros(normalized_frame_size, dtype=np.int) for i in range(stack_size)], maxlen=4)
cbs = env_callbacks(summary_writer, args.environment, interval) #end logging stuff from aegis_core.flask_controller import FlaskController from aegis_core.env_engine import EnvEngine log = logging.getLogger('werkzeug') log.setLevel(logging.ERROR) matplotlib.use("Agg") #threading issue print("Creating {}...".format(args.environment)) obs_type = retro.Observations[args.obs_type.upper()] action_type = retro.Actions[args.action_type.upper()] env = retro.make(args.environment, obs_type=obs_type, use_restricted_actions=action_type) end_reward = args.end_reward print("Observation space:", env.observation_space) print("Action space:", env.action_space) #print(env.observation_space.low, env.observation_space.high) #both ram and image are 0-255 i think obs_scale = lambda x: x / 255. engine = EnvEngine(env, end_reward, action_url=args.url, run_name=args.name, reward_proxy=args.proxy,
@author: Parth """ import retro #import gym-retro #import policy, model and emvironment vectoriser from stable baselines from stable_baselines.common.policies import CnnPolicy from stable_baselines import A2C from stable_baselines.common.vec_env import DummyVecEnv #define some hyper parameters params = {"learning_rate": 1e-3, "gamma": 0.99} #initialise the environment env = retro.make('Airstriker-Genesis') #create the agent agent = A2C(CnnPolicy, DummyVecEnv([lambda: env]), **params) #reset the environment obs, state, dones = env.reset(), None, [False] #train the agent for x amount of timesteps agent.learn(total_timesteps=1000) #once the environment is trained, run a while loop while True: actions, state = agent.predict(obs, state=state, mask=dones) obs, rew, done, info = env.step(actions) env.render()
parser.add_argument( '--verbose', '-v', action='count', default=1, help='increase verbosity (can be specified multiple times)') parser.add_argument( '--quiet', '-q', action='count', default=0, help='decrease verbosity (can be specified multiple times)') args = parser.parse_args() env = retro.make(args.game, args.state or retro.STATE_DEFAULT, scenario=args.scenario, record=args.record) verbosity = args.verbose - args.quiet # if training proceeds slowly then try varying parameters here act = deepq.learn(env, q_func=model, lr=1e-3, max_timesteps=100000, buffer_size=50000, exploration_fraction=0.1, exploration_final_eps=0.02, print_freq=10, callback=callback)
def __init__(self, game='Gradius-Nes', state='Level1', scenario=None, info=None, render=True, fps=50, recorder=None, reader=None): self.action_spectrum = [ [0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0], # up [0, 0, 0, 0, 1, 0, 1, 0, 0], # up left [0, 0, 0, 0, 1, 0, 0, 1, 0], # up right [0, 0, 0, 0, 0, 1, 0, 0, 0], # down [0, 0, 0, 0, 0, 1, 1, 0, 0], # down left [0, 0, 0, 0, 0, 1, 0, 1, 0], # down right [0, 0, 0, 0, 0, 0, 1, 0, 0], # left [0, 0, 0, 0, 0, 0, 0, 1, 0], # right [0, 0, 0, 0, 0, 0, 0, 0, 1], # shoot [0, 0, 0, 0, 1, 0, 0, 0, 1], # shoot up [0, 0, 0, 0, 1, 0, 1, 0, 1], # shoot up left [0, 0, 0, 0, 1, 0, 0, 1, 1], # shoot up right [0, 0, 0, 0, 0, 1, 0, 0, 1], # shoot down [0, 0, 0, 0, 0, 1, 1, 0, 1], # shoot down left [0, 0, 0, 0, 0, 1, 0, 1, 1], # shoot down right [0, 0, 0, 0, 0, 0, 1, 0, 1], # shoot left [0, 0, 0, 0, 0, 0, 0, 1, 1], # shoot right [1, 0, 0, 0, 0, 0, 0, 0, 0], # opt [1, 0, 0, 0, 1, 0, 0, 0, 0], # opt up [1, 0, 0, 0, 1, 0, 1, 0, 0], # opt up left [1, 0, 0, 0, 1, 0, 0, 1, 0], # opt up right [1, 0, 0, 0, 0, 1, 0, 0, 0], # opt down [1, 0, 0, 0, 0, 1, 1, 0, 0], # opt down left [1, 0, 0, 0, 0, 1, 0, 1, 0], # opt down right [1, 0, 0, 0, 0, 0, 1, 0, 0], # opt left [1, 0, 0, 0, 0, 0, 0, 1, 0], # opt right [1, 0, 0, 0, 0, 0, 0, 0, 1], # opt shoot [1, 0, 0, 0, 1, 0, 0, 0, 1], # opt shoot up [1, 0, 0, 0, 1, 0, 1, 0, 1], # opt shoot up left [1, 0, 0, 0, 1, 0, 0, 1, 1], # opt shoot up right [1, 0, 0, 0, 0, 1, 0, 0, 1], # opt shoot down [1, 0, 0, 0, 0, 1, 1, 0, 1], # opt shoot down left [1, 0, 0, 0, 0, 1, 0, 1, 1], # opt shoot down right [1, 0, 0, 0, 0, 0, 1, 0, 1], # opt shoot left [1, 0, 0, 0, 0, 0, 0, 1, 1], # opt shoot right ] self.render = render self.fps = fps self.recorder = recorder self.reader = reader self.env = retro.make(game=game, state=state, info=info) self.env.reset() self.user_actions = [0, 0, 0, 0, 0, 0, 0, 0, 0] self.end_game = False if (render): self.env.render() self.env.unwrapped.viewer.window.on_key_press = self.on_key_press self.env.unwrapped.viewer.window.on_key_release = self.on_key_release
import retro import time import os from stable_baselines.common.vec_env import DummyVecEnv from stable_baselines import PPO2 from stable_baselines.common import set_global_seeds from stable_baselines.bench import Monitor from baselines.common.retro_wrappers import * from util import log_dir, callback, AirstrikerDiscretizer, CustomRewardAndDoneEnv # 環境の生成 (1) env = retro.make(game='Airstriker-Genesis', state='Level1') env = AirstrikerDiscretizer(env) # 行動空間を離散空間に変換 env = CustomRewardAndDoneEnv(env) # 報酬とエピソード完了の変更 env = StochasticFrameSkip(env, n=4, stickprob=0.25) # スティッキーフレームスキップ env = Downsample(env, 2) # ダウンサンプリング env = Rgb2gray(env) # グレースケール env = FrameStack(env, 4) # フレームスタック env = ScaledFloatFrame(env) # 状態の正規化 env = Monitor(env, log_dir, allow_early_resets=True) print('行動空間: ', env.action_space) print('状態空間: ', env.observation_space) # シードの指定 env.seed(0) set_global_seeds(0) # ベクトル化環境の生成 env = DummyVecEnv([lambda: env]) # モデルの生成
'--obs-type', '-o', default='image', choices=['image', 'ram'], help='the observation type, either `image` (default) or `ram`') parser.add_argument('--players', '-p', type=int, default=1, help='number of players/agents (default: 1)') args = parser.parse_args() obs_type = retro.Observations.IMAGE if args.obs_type == 'image' else retro.Observations.RAM env = retro.make(args.game, args.state or retro.State.DEFAULT, scenario=args.scenario, record=args.record, players=args.players, obs_type=obs_type) verbosity = args.verbose - args.quiet try: while True: ob = env.reset() t = 0 totrew = [0] * args.players while True: ac = env.action_space.sample() ob, rew, done, info = env.step(ac) t += 1 if t % 10 == 0: if verbosity > 1: infostr = ''
Render bk2 file to mp4 requires fmpeg: python -m retro.scripts.playback_movie winner.bk2 This playback demonstrates that if you stopped the simulation manually for some condition other than "done" returned from env.step, you will need to do that as well in the neural network playback, otherwise the simulation will continue and the neural network will keep taking actions. """ import retro # pip install gym-retro import numpy as np # pip install numpy import cv2 # pip install opencv-python import neat # pip install neat-python import pickle # pip install cloudpickle # create retro environment: game, state, scenario (defines rewards) environment = retro.make('SonicTheHedgehog-Genesis', 'GreenHillZone.Act2', scenario='contest', record='.') # reset environment to initial state observation = environment.reset() # configuration for playback from pkl must be the same as execution config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, 'config-neat') # NEAT setup population = neat.Population(config) population.add_reporter(neat.StdOutReporter(True)) stats = neat.StatisticsReporter() population.add_reporter(stats)
import retro import neat import cv2 import os import time import numpy as np import pickle from statistics import mean from handlers import * env = retro.make("SonicTheHedgehog3-Genesis") info(env) def eval_genomes(genomes, config): times_level_finished = 0 member = 0 generation_fitness_log = [] for genome_id, genome in genomes: state = env.reset() net = neat.nn.recurrent.RecurrentNetwork.create(genome, config) temp_state, _, _, _ = env.step(env.action_space.sample()) WIDTH = int(temp_state.shape[0]/8) HEIGHT = int(temp_state.shape[1]/8) done = False current_fitness = 0 generation_max_fitness = 0
self._max_episode_steps) steps, total_rew = rollout(self._env, acts) executed_acts = acts[:steps] self.node_count += update_tree(self._root, executed_acts, total_rew) return executed_acts, total_rew def brute_retro( game, max_episode_steps=4500, timestep_limit=100_000_000, state=retro.State.DEFAULT, scenario=None, ): env = retro.make(game, state, use_restricted_actions=retro.Actions.DISCRETE, scenario=scenario) env = Frameskip(env) env = TimeLimit(env, max_episode_steps=max_episode_steps) brute = Brute(env, max_episode_steps=max_episode_steps) timesteps = 0 best_rew = float('-inf') while True: acts, rew = brute.run() timesteps += len(acts) if rew > best_rew: print(f"new best reward {best_rew} => {rew}") best_rew = rew env.unwrapped.record_movie("best.bk2")
env = gym.make('CarRacing-v0') env.reset() for _ in range(100): # run for 1000 steps # env.render() action = env.action_space.sampe() # pick a random action env.step(action) # take action print (action) fdsfa v import retro env = retro.make(game='SonicTheHedgehog-Genesis', state='GreenHillZone.Act1') fdsa import gym import numpy as np import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.autograd import Variable from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
self.action_space = Discrete(2) def action(self, action): new_action = [0] * 9 new_action[self.index_right] = 1 new_action[self.index_b] = 1 new_action[self.index_a] = action return new_action if __name__ == '__main__': monitor = "results/unbiased_random/1" action_repeat = True single_life = True render = None env = retro.make("SuperMarioBros-Nes") env = MarioDiscretizer(env) if single_life: env = SingleLifeEnv(env) if monitor is not None: env = Monitor(env, monitor, video_callable=lambda i: False) if render is not None: env = AutoRenderer(env, auto_render_period=render) if action_repeat: env = FrameStack(env, 4) # model = WeightedRandomAgent() model = RandomAgent(lambda: env.action_space.sample()) player = BasicRoller(env, model, min_episodes=1) # total_rollouts = [player.rollouts() for rollout_i in trange(40)] # flat_rollouts = reduce(list.__add__, total_rollouts) # total_rewards = map(lambda r: r.total_reward, flat_rollouts)
checkpoints = [f for f in glob.glob('neat-checkpoint-*')] checkpoints = [int(f[16:]) for f in checkpoints] checkpoints.sort() return neat.Checkpointer.restore_checkpoint( 'neat-checkpoint-{}'.format(checkpoints[-1])) except: print( 'No checkpoints in our folder, starting training from generation 0' ) return neat.Population(config) if __name__ == "__main__": # Creates our ghosts and goblings environment: #env = retro.make('GhostsnGoblins-Nes','Level1') env = retro.make(game='GhostsnGoblins-Nes', record='../records') # Loads our selected configuration for our Neat neural network: config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, 'config-feedforward') ''' with open('winner.pkl', 'rb') as input_file: genome = pickle.load(input_file) ''' # Restore the last checkpoint if exist, else starts from zero: p = load_last_checkpoint() # Uncomment to restore a selected checkpoint if don't want to restore last checkpoint #p = neat.Checkpointer.restore_checkpoint('neat-checkpoint-803')
import retro from stable_baselines.common.policies import CnnLstmPolicy from stable_baselines.common.vec_env import SubprocVecEnv from stable_baselines import PPO2 n_cpu = 12 env = SubprocVecEnv( [lambda: retro.make('Airstriker-Genesis') for i in range(n_cpu)]) model = PPO2.load('ppo2_airstriker', env) obs = env.reset() while True: action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) env.render()
def __init__(self, config: Optional[Config] = None): super().__init__() global args self.config = config self.top = 150 self.left = 150 self.width = 1100 self.height = 700 self.title = 'Super Mario Bros AI' self.current_generation = 0 # This is the generation that is actual 0. If you load individuals then you might end up starting at gen 12, in which case # gen 12 would be the true 0 self._true_zero_gen = 0 self._should_display = True self._timer = QTimer(self) self._timer.timeout.connect(self._update) # Keys correspond with B, NULL, SELECT, START, U, D, L, R, A # index 0 1 2 3 4 5 6 7 8 self.keys = np.array( [0, 0, 0, 0, 0, 0, 0, 0, 0], np.int8) # I only allow U, D, L, R, A, B and those are the indices in which the output will be generated # We need a mapping from the output to the keys above self.ouput_to_keys_map = { 0: 4, # U 1: 5, # D 2: 6, # L 3: 7, # R 4: 8, # A 5: 0 # B } # Initialize the starting population individuals: List[Individual] = [] # Load any individuals listed in the args.load_inds num_loaded = 0 if args.load_inds: # Overwrite the config file IF one is not specified if not self.config: try: self.config = Config(os.path.join(args.load_file, 'settings.config')) except: raise Exception(f'settings.config not found under {args.load_file}') set_of_inds = set(args.load_inds) for ind_name in os.listdir(args.load_file): if ind_name.startswith('best_ind_gen'): ind_number = int(ind_name[len('best_ind_gen'):]) if ind_number in set_of_inds: individual = load_mario(args.load_file, ind_name, self.config) # Set debug stuff if needed if args.debug: individual.name = f'm{num_loaded}_loaded' individual.debug = True individuals.append(individual) num_loaded += 1 # Set the generation self.current_generation = max(set_of_inds) + 1 # +1 becauase it's the next generation self._true_zero_gen = self.current_generation # Load any individuals listed in args.replay_inds if args.replay_inds: # Overwrite the config file IF one is not specified if not self.config: try: self.config = Config(os.path.join(args.replay_file, 'settings.config')) except: raise Exception(f'settings.config not found under {args.replay_file}') for ind_gen in args.replay_inds: ind_name = f'best_ind_gen{ind_gen}' fname = os.path.join(args.replay_file, ind_name) if os.path.exists(fname): individual = load_mario(args.replay_file, ind_name, self.config) # Set debug stuff if needed if args.debug: individual.name= f'm_gen{ind_gen}_replay' individual.debug = True individuals.append(individual) else: raise Exception(f'No individual named {ind_name} under {args.replay_file}') # If it's not a replay then we need to continue creating individuals else: num_parents = max(self.config.Selection.num_parents - num_loaded, 0) for _ in range(num_parents): individual = Mario(self.config) # Set debug stuff if needed if args.debug: individual.name = f'm{num_loaded}' individual.debug = True individuals.append(individual) num_loaded += 1 self.best_fitness = 0.0 self._current_individual = 0 self.population = Population(individuals) self.mario = self.population.individuals[self._current_individual] self.max_distance = 0 # Track farthest traveled in level self.max_fitness = 0.0 self.env = retro.make(game='SuperMarioBros-Nes', state=f'Level{self.config.Misc.level}') # Determine the size of the next generation based off selection type self._next_gen_size = None if self.config.Selection.selection_type == 'plus': self._next_gen_size = self.config.Selection.num_parents + self.config.Selection.num_offspring elif self.config.Selection.selection_type == 'comma': self._next_gen_size = self.config.Selection.num_offspring # If we aren't displaying we need to reset the environment to begin with if args.no_display: self.env.reset() else: self.init_window() # Set the generation in the label if needed if args.load_inds: txt = "<font color='red'>" + str(self.current_generation + 1) + '</font>' # +1 because we switch from 0 to 1 index self.info_window.generation.setText(txt) # if this is a replay then just set current_individual to be 'replay' and set generation if args.replay_file: self.info_window.current_individual.setText('Replay') txt = f"<font color='red'>{args.replay_inds[self._current_individual] + 1}</font>" self.info_window.generation.setText(txt) self.show() if args.no_display: self._timer.start(1000 // 1000) else: self._timer.start(1000 // 60)
## Run the selected game and state from here import gym import retro from stable_baselines.common.policies import MlpPolicy, CnnPolicy from ppo2template import PPO2 from brute import TimeLimit from discretizer import SF2Discretizer env = retro.make('StreetFighterIISpecialChampionEdition-Genesis', 'Champion.Level1.RyuVsGuile.state', obs_type=retro.Observations.IMAGE ) #change to compare IMAGE to RAM observations env = SF2Discretizer(env) env = TimeLimit(env, max_episode_steps=2500) model = PPO2(MlpPolicy, env, n_steps=2500, verbose=2).learn( 75000) #put the selected policy and episode steps in here. model.save("ppo2_esf") #del model # remove to demonstrate saving and loading #model = PPO2.load("ppo2_esf") # load a saved file # Enjoy trained agent obs = env.reset() timesteps = 0 totalrewards = 0.0 #env.unwrapped.record_movie("PPOII.bk2") #to start saving the recording while True: action, _states = model.predict(obs)
import retro import numpy as np import cv2 import neat import pickle env = retro.make('SuperMarioWorld-Snes', 'YoshiIsland2.state', record=True) imgarray = [] xpos_end = 0 config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction, neat.DefaultSpeciesSet, neat.DefaultStagnation, 'config-feedforward') p = neat.Population(config) p.add_reporter(neat.StdOutReporter(True)) stats = neat.StatisticsReporter() p.add_reporter(stats) with open('winner.pkl', 'rb') as input_file: genome = pickle.load(input_file) ob = env.reset() ac = env.action_space.sample() inx, iny, inc = env.observation_space.shape inx = int(inx / 8)
import retro import numpy as np import cv2 import neat import pickle # create game environment env = retro.make('SuperMarioWorld-Snes', 'DonutPlains1') # create a window to show the gameplay cv2.namedWindow('SuperMarioWorld-Snes | NEAT-Python | jubatistim', cv2.WINDOW_NORMAL) cv2.moveWindow("SuperMarioWorld-Snes | NEAT-Python | jubatistim", 950, 120) cv2.resizeWindow('SuperMarioWorld-Snes | NEAT-Python | jubatistim', 800, 600) # generation generation = 9 # funtion to evaluate genomes during training process def eval_genomes(genomes, config): # generation global generation generation += 1 for genome_id, genome in genomes: ####################################################################################################### ###########################################control variables########################################### #######################################################################################################
opponents = ["Raiden", "Jax", "SubZero", "Scorpion", "Baraka"] left_players = available_opponents versions = range(32) counter = 0 num_to_generate = len(difficulties) * len(arenas) * len(opponents) * len( versions) * len(left_players) start = time.time() for difficulty in difficulties: for arena in arenas: for opp in opponents: for p1 in left_players: for version in versions: env = retro.make( game_name, players=2, state=f"{difficulty}_{arena}_{opp}_{version}") env.reset() # Get paths for cursors p1_cords = np.where(fighters_matrix == p1) p1_actions = ["RIGHT", "LEFT"] for _ in range(p1_cords[0][0]): p1_actions.append("DOWN") for _ in range(p1_cords[1][0]): p1_actions.append("RIGHT") p1_actions.append("A") # Execute and save
import retro movie = retro.Movie('SonicTheHedgehog-Genesis-GreenHillZone.Act1-000000.bk2') movie.step() env = retro.make( game=movie.get_game(), state=None, # bk2s can contain any button presses, so allow everything use_restricted_actions=retro.Actions.ALL, players=movie.players, ) env.initial_state = movie.get_state() env.reset() while movie.step(): env.render() keys = [] for p in range(movie.players): for i in range(env.num_buttons): keys.append(movie.get_key(i, p)) env.step(keys)
import retro import numpy as np import cv2 import neat import pickle #import os env = retro.make(game='Airstriker-Genesis') #Spiel wird geladen def get_image(inx, iny, ob): env.render() ob = cv2.resize(ob, (inx, iny)) ob = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY) return ob def img_flatten(ob): imgarray = np.ndarray.flatten(ob) return imgarray def get_action(nnOutput): action = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] action[0] = nnOutput[0] action[6] = nnOutput[1] action[7] = nnOutput[2] return action ob = env.reset()
import retro import numpy as np import cv2 import neat import pickle # create game environment env = retro.make('DonkeyKongCountry-Snes', '1Player.CongoJungle.JungleHijinks.Level1') # create a window to show the gameplay cv2.namedWindow('DonkeyKongCountry-Snes | NEAT-Python | jubatistim', cv2.WINDOW_NORMAL) cv2.moveWindow("DonkeyKongCountry-Snes | NEAT-Python | jubatistim", 950, 120) cv2.resizeWindow('DonkeyKongCountry-Snes | NEAT-Python | jubatistim', 800, 600) # generation generation = -1 # funtion to evaluate genomes during training process def eval_genomes(genomes, config): # generation global generation generation += 1 for genome_id, genome in genomes: ####################################################################################################### ###########################################control variables###########################################
''' ================================================================================== Name : Altered Beast AI Author : Kenshiro Version : 1.00 Copyright : GNU General Public License (GPLv3) Description : Altered Beast AI based on the T-Rex evolutionary neural network ================================================================================== ''' #Action space: MultiBinary(12) #Observation space: Box(224, 320, 3) #Observation high: 255, low: 0 import retro env = retro.make(game='AlteredBeast-Genesis') import math import array import sys import time import cv2 from ctypes import * so_file = "/usr/local/lib/libT-Rex.so" tRex = CDLL(so_file) #OBSERVATION DATA SCREEN_ROWS = 224 SCREEN_COLUMNS = 320 SKIPPED_PIXELS = 2
#!/usr/bin/env python # -*- coding: utf-8 -*- # @File : mario.py # @Author: [email protected] # @Date : 2019-03-21 # @Desc : import retro import random env = retro.make(game='SuperMarioBros-Nes') actions = { # 0 1 2 3 4 5 6 7 8 # b n u d l r a 0: [0, 1, 0, 0, 0, 0, 0, 0, 0], # noop 1: [0, 0, 0, 0, 0, 0, 1, 0, 0], # left 2: [0, 0, 0, 0, 0, 0, 0, 1, 0], # right 3: [0, 0, 0, 0, 0, 0, 0, 0, 1], # a 4: [0, 0, 0, 0, 0, 0, 0, 1, 1], # right + a 5: [1, 0, 0, 0, 0, 0, 0, 1, 0], # right + b 6: [1, 0, 0, 0, 0, 0, 0, 1, 1], # right + a +b } def sample(): return random.randint(0, 6) if __name__ == '__main__': obs = env.reset() for i in range(100000): act_index = sample()
# In[2]: #pip install -r requirements.txt # In[3]: import retro import numpy as np import cv2 import neat import pickle # In[4]: env = retro.make(game='Airstriker-Genesis') obs = env.reset() done = False imgarray = [] # In[17]: def eval_genomes(genomes, config): for genome_id, genome in genomes: obs = env.reset() inx, iny, inc = env.observation_space.shape inx = int(inx / 8) iny = int(iny / 8)
def eval_genomes(genomes: list, config: neat.config.Config) -> None: """O objetivo dessa função é selecionar os melhores indivíduos em uma população atribuindo a cada um deles um Fitness para indicar o quão bom são. Args: genomes (list): Uma lista contendo todos os indivíduos da população (que são da classe 'neat.genome.DefaultGenome'). config (neat.config.Config): Arquivo de texto contendo as configurações para a biblioteca NEAT-Python (obrigatório) como o número de inputs, outputs, funções de ativação, tamanho da população e etc. """ global is_new_better # Usado para verificar se foi encontrado um novo melhor indivíduo durante o treinamento env = retro.make(game='SuperMarioWorld-Snes', state='YoshiIsland2', players=1) print('\n################### INICIO ###################\n\n') # Fazendo uma iteração sobre todos os indivíduos da população para atribuir a cada um o Fitness correspondente for genome_id, genome in genomes: env.reset() net = neat.nn.FeedForwardNetwork.create( genome, config) # Criando a Rede Neural penalty = 0 # Penalidade dada ao Mário quando ele "enfrenta" cara a cara um inimigo frame = 0 # Utilizado para calcular quantos frames já passaram e verificar se o Mário ficou preso em um "Loop" count = 0 # Utilizado para verificar se o Mário travou no eixo x (dx = 0 -> c += 1) t = 0 # Usado para medirmos o tempo gasto em cada fase done = False # Usado para indicar quando a validação terminará print(f'# Individuo ID {genome_id} | População ID {p.generation} #') while not done: inputs, mario_x, mario_y = ri.getInputs(ri.getRam( env)) # Pegando o vetor de entrada e a posição em x do Mário output = net.activate( inputs) # Pegando a saída da RNA ao utilizarmos o input acima action = [0 if bit < 0.5 else 1 for bit in output ] # Transformando em binário para simbolizar os botões env.step(action) # Mudamos o ambiente ao executar a ação if display: env.render( ) # Usado para selecionar se é desejado ver ou não a tela do Mário frame += 1 # Um frame a mais! # Pega o novo vetor de entrada e a nova posição em x do Mário (usados para verificar se o agente ficou preso) new_inputs, new_mario_x, new_mario_y = ri.getInputs(ri.getRam(env)) # Recebendo a RAM do jogo: ram = env.get_ram() # Indica que o Mário morreu if ram[0x0071] == 9: done = True # Verificar se o Mário travou if new_mario_x == mario_x: count += 1 else: count = 0 if count > 100: done = True # Se ficou parado em x quando count bate 100, encerramos a validação # Sinaliza que o Mário ficou preso em um loop (e.g., indo repetidamente da esquerda para direita) marks = [v for v in range(500, 5500, 500) ] # Marcos úteis para verificar for mark in marks: if new_mario_x < mark < frame: done = True # Posições 97 e 98 são logo a frente do Mário; Usaremos para desencorajar Mário a "enfrentar" os inimigos "cara a cara" if -1 in (new_inputs[97], new_inputs[98]): penalty += 5 # Indica que o mário acabou a fase if ram[0x13D9] == 2: # Usaremos isso para inserir o fator tempo entre os melhores que terminaram a fase (decidir pelo mais rapido) fitness = new_mario_x + 1000 * (390 - t) / 390 done = True else: # Fitness é a posição x que o Mário conseguiu alcançar descontado de uma penalização fitness = new_mario_x - penalty # Mário abriu uma caixa de mensagem (Yoshi ou Bloco de informação) while ram[0x1426] != 0: env.step(ut.dec2bin( 1)) # Botão que vai fazer ele fechar o balão de fala if display: env.render( ) # Usado para selecionar se é desejado ver ou não a tela do Mário env.step( ut.dec2bin(0) ) # Não faz nada (Como se ele estivesse apertando e soltando o botão) if display: env.render( ) # Usado para selecionar se é desejado ver ou não a tela do Mário new_inputs, new_mario_x, new_mario_y = ri.getInputs( ri.getRam(env)) ram = env.get_ram() # Pegando o tempo in game if ram[0x0F30] == 0: t += 1 # Atualiza o fitness do indivíduo genome.fitness = float(fitness) # Selecionamos o arquivo com base no argv if len(sys.argv) == 1: bgf = best_genome_file elif len(sys.argv) == 2 and sys.argv[1] == 'new': bgf = ng_best_genome_file # Se for o melhor, salvamos ele if os.path.exists(bgf): with open(bgf, 'rb') as file: best_genome = pickle.load(file) if genome.fitness > best_genome.fitness: best_genome = genome is_new_better = True with open(bgf, 'wb') as file: pickle.dump(genome, file) else: # Se não existir o arquivo de melhor genome, criamos e salvamos o genome atual best_genome = genome is_new_better = True with open(bgf, 'wb') as file: pickle.dump(genome, file) # Imprime as características desse indivíduo print(f'==================== INFO ====================\n' f'-> Geração: {p.generation}\n' f'-> ID do Indivíduo: {genome_id}\n' f'-> Posição final: ({new_mario_x}, {new_mario_y})\n' f'-> Penalidade total: {penalty:.2f}\n' f'-> Fitness: {genome.fitness:.2f}\n' f'-> Tempo gasto (no jogo): {t}s\n' f'-> É o melhor: {genome.fitness == best_genome.fitness}\n' f'==============================================\n\n') print('\n##################### FIM ####################\n\n') env.render(close=True) del env return None
def eval_genome(genome, config): environment = retro.make('SuperMarioBros-Nes', 'Level1-1', scenario='scenario') # reset environment to initial state observation = environment.reset() # shape/resolution of image created by emulator inx, iny, inc = environment.observation_space.shape # scale down observation inx = int(inx / 8) iny = int(iny / 8) # create NEAT network alt # network = neat.nn.FeedForwardNetwork.create(genome, config) # create NEAT network network = neat.nn.recurrent.RecurrentNetwork.create(genome, config) # set up some variables to track fitness current_max_fitness = 0 fitness = 0 counter = 0 finished = False while not finished: # render the game (be careful!) # env.render() # resize and reshape the observation image observation = cv2.resize(observation, (inx, iny)) observation = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY) observation = np.reshape(observation, (inx, iny)) # create a single array from 2d pixel data img_array = np.ndarray.flatten(observation) # create actions from input actions = network.activate(img_array) # take a peek at actions before translation # print(actions) # map relu activation output to 0 or 1 actions = np.where(np.array(actions) <= 0.0, 0.0, 1.0).tolist() # take a peek at actions before translation # print(actions) # increment the emulator state observation, reward, done, info = environment.step(actions) # update fitness with reward from environment fitness += reward # give it 250 steps without improvement to improve fitness or restart if fitness > current_max_fitness: current_max_fitness = fitness counter = 0 else: counter += 1 if done or counter == 250: finished = True return current_max_fitness