def __init__(self, movements, max_steps): """ Creates a new Simulator. The Simulator lets individuals play the game and assigns their resulting fitness to them. :param movements: a list of movements the individuals are allowed to make :param max_steps: the maximum number of simulation steps an individual is allowed to use """ self.movements = movements self.max_steps = max_steps # TODO maybe another name on "env_expanded"? self.env_expanded = gym_super_mario_bros.SuperMarioBrosEnv( frames_per_step=1, rom_mode='vanilla') self.env = BinarySpaceToDiscreteSpaceEnv(self.env_expanded, self.movements) # self.env.metadata['video.frames_per_second'] = 120 # self.env_expanded.metadata['video.frames_per_second'] = 120 self._log = logging.getLogger('MLProject.Simulator')
from nes_py.wrappers import BinarySpaceToDiscreteSpaceEnv import gym_super_mario_bros import threading, time import matplotlib.pyplot as plt import matplotlib.colors as c import skimage.measure import math import msvcrt import numpy as np import matplotlib.animation as animation from gym_super_mario_bros.actions import SIMPLE_MOVEMENT env_mordi = gym_super_mario_bros.SuperMarioBrosEnv(frames_per_step=1, rom_mode='vanilla') env = BinarySpaceToDiscreteSpaceEnv(env_mordi, SIMPLE_MOVEMENT) input_space = { b'a': 6, b'd': 1, b's': 0, b'w': 5, } key = b's' def yas(array, axis): return array def downscale(env_expanded, state, box_height=10, box_width=(-2, 7), res=1): """
def replay_genome(genome, movements, gen): env_expanded = gym_super_mario_bros.SuperMarioBrosEnv(frames_per_step=1, rom_mode='vanilla') env = BinarySpaceToDiscreteSpaceEnv(env_expanded, movements) print('Number of genes: ', len(genome.connection_genes)) for gene in genome.connection_genes: print(gene.in_node, gene.out_node, gene.weight, gene.innovation_number, gene.type, gene.enabled) done = True unticked = 0 tick_interval = 1 / 30 last_tick_time = time.time() fps = 0 frames = 0 last_fps_time = time.time() for _ in range(500000): unticked += time.time() - last_tick_time last_tick_time = time.time() ticked = False # while unticked >= tick_interval: if done: state = env.reset() state_downscaled = get_sensor_map(env_expanded) action = genome.calculate_action(state_downscaled) # print('\rFPS: {:.3f}'.format(fps), end=' ') # print(vectofixedstr(action, 10), end=' ') action = np.argmax(action) print('\rtaking action', movements[action], end='', flush=True) state, reward, done, info = env.step(action) #filename = get_path_of('all_pictures/mario/') #imsave(filename + 'mario_' + str(_) + '.png', state) save_state = np.full((13, 10, 3), 255, dtype=np.int) COLORS = [[250, 250, 250], [0, 0, 0], [196, 0, 0], [0, 0, 196]] for i in range(13): for j in range(10): if state_downscaled[(i, j)] == -1: save_state[(i, j)] = COLORS[3] elif state_downscaled[(i, j)] == 0: save_state[(i, j)] = COLORS[0] else: save_state[(i, j)] = COLORS[1] save_state[(7, 2)] = COLORS[2] # filename = get_path_of('all_pictures/input_downscaled/') # imsave(filename + 'state_' + str(_) + '.png', save_state.astype(np.uint8)) # make_controller(movements[action], _, gen) env.render() if info["life"] <= 2: died = True break ticked = True frames += 1 unticked -= tick_interval # if ticked: # now = time.time() # if now - last_fps_time >= 1: # fps = frames / (now - last_fps_time) # last_fps_time = now # frames = 0 # else: # time.sleep(0.001) env.close()
import matplotlib.pyplot as plt import matplotlib.animation as animation import threading stdout_log_handler = logging.StreamHandler(sys.stdout) stdout_log_handler.setFormatter( logging.Formatter('%(name)s: [%(levelname)s] %(message)s')) root_log = logging.getLogger() root_log.setLevel(logging.DEBUG) root_log.addHandler(stdout_log_handler) log = logging.getLogger('MLProject') gym.logger.setLevel(gym.logger.DEBUG) # env = gym_super_mario_bros.make('SuperMarioBros-v3') _env = gym_super_mario_bros.SuperMarioBrosEnv(frames_per_step=4, rom_mode='rectangle') env = BinarySpaceToDiscreteSpaceEnv(_env, right_movements) agent = PPOAgent(states=dict(type='float', shape=(20, 11, 3)), actions=dict(type='int', num_actions=len(right_movements)), network=[ dict(type='flatten'), dict(type='dense', size=32), dict(type='dense', size=64), dict(type='dense', size=64) ], batching_capacity=5, step_optimizer=dict(type='adam', learning_rate=0.01)) done = True state = np.empty((240, 256, 3))