Esempio n. 1
0
    def play(self):
        env = gym_tetris.make('TetrisA-v0')
        env = JoypadSpace(env, MOVEMENT)
        state = env.reset()
        model = self.global_model
        model_path = os.path.join(self.save_dir,
                                  'model_{}.h5'.format('Tetris'))
        print('Loading model from: {}'.format(model_path))
        model.load_weights(model_path)
        done = False
        step_counter = 0
        reward_sum = 0
        pieza_colocada = True
        informacion = env.get_info()
        antiguo_statistics = informacion['statistics']
        state = [0, 0, 0, 0]
        while not done:
            env.render()
            if pieza_colocada:
                pieza_colocada = False
                pos = 5
                giro = 0
                u = -1
                state = [state]
                policy, value = model(
                    tf.convert_to_tensor(state, dtype=tf.float32))
                policy = tf.nn.softmax(policy)
                action = np.argmax(policy)
                pos_objetivo = action % 10
                giro_objetivo = action // 10
            if (giro % giro_objetivo) != 0 and not done:
                state, reward, done, info = env.step(1)
                accion = 0
                giro = giro + 1
            elif pos > pos_objetivo and not done:
                state, reward, done, info = env.step(6)
                pos = pos - 1
                accion = 0
            elif pos < pos_objetivo and not done:
                state, reward, done, info = env.step(3)
                pos = pos + 1
                accion = 0
            elif not done and not pieza_colocada:
                state, reward, done, info = env.step(9)
                accion = 9
            else:
                accion = 0
            if not done:
                state, reward, done, info = env.step(accion)
            env.render()
            informacion = env.get_info()
            if antiguo_statistics != informacion['statistics']:
                antiguo_statistics = informacion['statistics']
                step_counter += 1

        env.close()
class Worker(object):
    def __init__(self, genome, config):
        self.genome = genome
        self.config = config

    @property
    def work(self):

        self.env = gym_tetris.make('TetrisA-v0')
        self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT)

        self.env.reset()
        action = np.argmax(self.env.action_space.sample())
        ob, _, _, _ = self.env.step(int(action))

        inx = 10
        iny = 20
        done = False

        # net = neat.nn.FeedForwardNetwork.create(self.genome, self.config)
        net = neat.nn.recurrent.RecurrentNetwork.create(
            self.genome, self.config)
        fitness = 0
        xpos = 0
        xpos_max = 16
        counter = 0
        max_score = 0
        moving = 0
        frames = 0

        while not done:
            scaledimg = cv2.cvtColor(ob, cv2.COLOR_BGR2RGB)
            ob = Minimize(ob)
            ob = cv2.resize(ob, (10, 20))

            cv2.imshow('humanwin', scaledimg)
            cv2.waitKey(1)

            imgarray = np.ndarray.flatten(ob)

            actions = net.activate(imgarray)
            action = np.argmax(actions)
            ob, rew, done, info = self.env.step(int(action))

            frames += 1
            if frames == 1200:
                fitness += 1
                frames = 0

        print(
            f"genome:{self.genome.key} Fitnes: {fitness} lines: {info['number_of_lines']}"
        )

        return int(fitness)
Esempio n. 3
0
class Worker(object):
    def __init__(self, genome, config):
        self.genome = genome
        self.config = config
        self.x = 13
        self.y = 15
        self.w = 18
        self.h = 17

    def work(self):
        self.env = gym_super_mario_bros.make('SuperMarioBros-v3')
        self.env = JoypadSpace(self.env, RIGHT_ONLY)
        self.env.reset()
        observation, _, _, _ = self.env.step(self.env.action_space.sample())
        done = False
        net = neat.nn.FeedForwardNetwork.create(self.genome, self.config)
        max_fitness = 0
        fitness = 0
        xpos = 0
        xpos_max = 0
        counter = 0
        while not done:
            observation = observation[self.y * 8:self.y * 8 + self.h * 8,
                                      self.x * 8:self.x * 8 + self.w * 8]

            observation = cv2.resize(observation, (self.w, self.h))
            observation = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
            observation = cv2.resize(observation, (self.w, self.h))

            imgarray = np.ndarray.flatten(observation)
            imgarray = np.interp(imgarray, (0, 254), (-1, +1))
            nnOutput = net.activate(imgarray)

            observation, reward, done, info = self.env.step(
                nnOutput.index(max(nnOutput)))
            fitness += int(reward)
            if fitness > max_fitness:
                max_fitness = fitness
                counter = 0
            else:
                counter += 1
            if done or counter > 350 or info['life'] < 2:
                done = True
                fitness += info['score']
            if info['flag_get']:
                fitness += 100000

        return fitness
Esempio n. 4
0
class Mario():
    def __init__(self, img_size=32, stacks=4, skips=4, return_seq=False):

        from nes_py.wrappers import JoypadSpace
        import gym_super_mario_bros
        from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
        env = gym_super_mario_bros.make('SuperMarioBros-v0')
        env = JoypadSpace(env, SIMPLE_MOVEMENT)
        env = gym_super_mario_bros.make('SuperMarioBros-v2')

        self.env = JoypadSpace(env, SIMPLE_MOVEMENT)
        self.preprocess = Preprocess(img_size, stacks, return_seq)
        self.skips = skips
        self.action_space = self.env.action_space
        self.observation_space = (img_size, img_size, stacks)

    def reset(self):
        self.preprocess.reset()
        s = self.env.reset()
        s = self.preprocess(s)
        return s

    def step(self, a):
        total_r = 0
        for i in range(self.skips):
            self.env.render()

            n_s, r, done, info = self.env.step(a)
            n_s = self.preprocess(n_s)
            total_r += r

            if done: break

        return n_s, total_r, done, info
Esempio n. 5
0
def play_mario():
    from nes_py.wrappers import JoypadSpace
    import gym_super_mario_bros
    from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

    env = gym.make('SuperMarioBros-v3')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)

    env.reset()
    done = False
    step = -1

    while not done:
        step += 1
        time.sleep(1 / 100)
        env.render()

        # print(step)

        action = env.action_space.sample()
        # action = 0
        # if keyboard.is_pressed('a'):
        #     action = 4

        obs, reward, done, info = env.step(action)
        print(obs.shape)
def test_env(env, model, device, deterministic=True):
    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    env = RewardScalar(env)
    env = WarpFrame(env)
    env = FrameStack(env, 4)
    env = StochasticFrameSkip(env, 4, 0.5)
    env = ScaledFloatFrame(env)
    # env=gym.wrappers.Monitor(env, 'recording/PPORB5/{}'.format(str(num)), video_callable=lambda episode_id: True, force=True)
    state = env.reset()
    done = False
    total_reward = 0
    distance = []
    print("yes")
    for i in range(2000):
        state = torch.FloatTensor(state).to(device)
        state = state.float()
        state = state.permute(3, 0, 1, 2)
        dist, _ = model(state)
        policy = dist
        policy = Categorical(F.softmax(policy, dim=-1).data.cpu())
        actionLog = policy.sample()
        action = actionLog.numpy()
        next_state, reward, done, info = env.step(action[0])
        distance.append(info['x_pos'])
        state = next_state
        total_reward += reward
        env.render()

    print(total_reward)
    print(max(distance))
Esempio n. 7
0
class MarioEnvWrapper(GymEnvWrapper, TensorStateMixin):
    max_steps = 10  # TODO: Fix this
    reward_range = (-100, 100)  # TODO: Fix this

    def __init__(self):
        super().__init__()
        self.env = gym_super_mario_bros.make("SuperMarioBros-v0")
        self.env = JoypadSpace(self.env, COMPLEX_MOVEMENT)
        self.history_size = 3
        self.action_repeats = 6

    @timeout_lost
    @step_incrementer
    def step(self, action: int, **kwargs) -> Tuple[Any, Any, bool, dict]:
        for _ in range(self.action_repeats):
            frame, self.reward, self.done, self.info = self.env.step(action)
            self.state = prepare_multi_state(self.state, frame)
            if self.done:
                break
        return self.state, self.reward, self.done, self.info

    @reset_incrementer
    def reset(self):
        frame = self.env.reset()
        self.state = prepare_initial_state(frame, self.history_size)
        self.done = False
        return self.state

    def get_legal_actions(self):
        return list(range(12))
Esempio n. 8
0
def run(file):
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     'config-feedforward')

    genome = pickle.load(open(file, 'rb'))
    #print(genome)
    env = gym_super_mario_bros.make('SuperMarioBros-1-1-v2')
    env = JoypadSpace(env, RIGHT_ONLY)

    env1 = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
    env1 = JoypadSpace(env1, RIGHT_ONLY)


    net = neat.nn.FeedForwardNetwork.create(genome, config)
    try:
        obs = env.reset()
        env1.reset()

        inx = int(obs.shape[0] / 8)
        iny = int(obs.shape[1] / 8)
        done = False
        while not done:
            #env.render()
            env1.render()
            obs = cv2.resize(obs, (inx, iny))
            obs = cv2.cvtColor(obs, cv2.COLOR_BGR2GRAY)
            obs = np.reshape(obs, (inx, iny))

            imgarray = np.ndarray.flatten(obs)

            actions = net.activate(imgarray)
            action =  np.argmax(actions)
            
            _,_,_,info1 = env1.step(action)
            s, reward, done, info = env.step(action)
            xpos = info['x_pos']


            print(done, action, xpos)
            obs = s
        env1.close()
        env.close()
    except KeyboardInterrupt:
        env.close()
        env1.close()
        exit()
Esempio n. 9
0
    def fitness_func(self, genome, config, o):
        # create the environment
        game = gym_super_mario_bros.make('SuperMarioBros-v2')
        env = JoypadSpace(game, SIMPLE_MOVEMENT)
        try:
            # reset environment and create network from config file
            state = env.reset()
            neural_net = neat.nn.recurrent.RecurrentNetwork.create(
                genome, config)
            # frame count
            i = 0
            # starting mario position
            start_mario_distance = 40
            done = False

            # get shape of pixels
            inx, iny, inc = env.observation_space.shape
            inx, iny = int(inx / 8), int(iny / 8)

            while not done:
                # env.render() uncomment this to see mario play
                # resize image array and convert to grayscale
                state = cv2.resize(state, (inx, iny))
                state = cv2.cvtColor(state, cv2.COLOR_BGR2GRAY)
                state = np.reshape(state, (inx, iny))
                # flatten array so the network likes it
                state = state.flatten()

                # feed the state through the network and get max output
                output = neural_net.activate(state)
                action = output.index(max(output))

                # do the action from the net
                observation, reward, done, info = env.step(action)
                state = observation
                # increase frame count
                i += 1

                # check if 50 frames if mario moves and break from loop to restart if he hasn't
                if i % 50 == 0:
                    if start_mario_distance == info['x_pos']:
                        break
                    else:
                        start_mario_distance = info['x_pos']

            # give a negative reward if mario didn't move else reward the distance he moved
            fitness = -1 if info['x_pos'] <= 40 else info['x_pos']

            # if at the end of the level dump the current genome to file
            if fitness >= 4000:
                pickle.dump(genome, open("winning_genome.pkl", "wb"))

            # put current fitness into queue
            o.put(fitness)
            env.close()

        except KeyboardInterrupt:
            env.close()
            sys.exit()
Esempio n. 10
0
def main():
    """
    Main entry point function for program.
    """

    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = JoypadSpace(env, RIGHT_ONLY)

    action_size = len(RIGHT_ONLY)
    cdqn = CDQN(action_size, memory_size=10000, image_shape=(45, 64, 1))

    batch_size = 1024
    games = 10000
    skip = 100
    beaten = False

    for game in range(games):

        print("Game: {}".format(game + 1), end=" ")
        done = True
        total_reward = 0
        for step in range(8000):

            # Preprocess first image
            if done:
                state = env.reset()
                state = preprocess_image(state)[..., tf.newaxis]

            # Play move
            action = cdqn.act(state)
            next_state, reward, done, info = env.step(action)
            total_reward += reward

            # Remember move
            next_state = preprocess_image(next_state)[..., tf.newaxis]
            cdqn.remember(state, action, total_reward, next_state, done)
            state = next_state

            # Render game
            env.render()

            if done:
                break

            # Train when there are enough examples in memory
            #if len(cdqn.memory) >= batch_size and step % skip == 0:
        print("Reward: {}".format(total_reward))

        for e in range(5):
            print('Epoch {}'.format(e + 1))
            cdqn.experience_replay(batch_size)

        if game % 10 == 0:
            cdqn.update_target_model()

        print("Reward: {}".format(total_reward))
        tf.saved_model.save(cdqn.network, "model.sav")

    env.close()
Esempio n. 11
0
    def rank(self):
        for model in self.models:
            env = gym_tetris.make('TetrisA-v0')
            env = JoypadSpace(env, SIMPLE_MOVEMENT)
            env.reset()
            done = False
            info = None

            while not done:
                # Generate all options
                options = [[Action.DROP]]
                for x in range(1, 5):
                    left_option = [Action.LEFT] * x
                    left_option.append(Action.DROP)
                    options.append(left_option)
                    right_option = [Action.RIGHT] * x
                    right_option.append(Action.DROP)
                    options.append(right_option)

                # Enumerate all choices
                boards = []
                for option in options:
                    # Back-up the environment first
                    env.unwrapped._backup()

                    # Run the sequence of actions
                    state = None
                    for action in option:
                        state, _, _, _ = env.step(action.value)

                    # Now, parse the board from the state
                    board = parse_blocks(state)
                    boards.append(board)
                    env.unwrapped._restore()

                # Choose the best option genetically
                choice = model.best(boards)
                for action in options[choice]:
                    _, _, done, info = env.step(action.value)

            model.fitness = info['score']

        self.models = sorted(self.models, key=lambda model: model.fitness)
Esempio n. 12
0
    def run_player(self, member):
        env = gym_super_mario_bros.make(self.env)
        env = JoypadSpace(env, self.actions)
        env = WarpFrame(env)
        env = FrameStack(env, 4)
        player = MarioPlayer(self.num_of_actions, member.genes)

        if self.record:
            rec_output_path = os.path.join(
                self.current_gen_output_dir, "vid",
                "{name}.mp4".format(name=member.get_name()))
            rec = monitor.video_recorder.VideoRecorder(env,
                                                       path=rec_output_path)

        state = env.reset()
        done = False

        last_x_pos = 0
        same_x_pos_cunt = 0

        for step in range(self.steps_scale):
            if done:
                break
            action = player.act(state)
            state, reward, done, info = env.step(action)

            if self.record:
                rec.capture_frame()
            if self.render:
                env.render()

            player.update_info(info)
            player.update_reward(reward)
            if last_x_pos == info['x_pos']:
                same_x_pos_cunt += 1
            else:
                same_x_pos_cunt = 0
                last_x_pos = info['x_pos']
            if same_x_pos_cunt > self.standing_steps_limit:  # end the run if player don't advance:
                done = True
            if not self.allow_death and info[
                    'life'] < INITIAL_LIFE:  # will repeat death, so why try more
                done = True
            if info['flag_get']:  # if got to the flag - run is ended.
                done = True

        if self.record:
            rec.close()
        env.close()
        member.set_fitness_score(player.calculate_fitness())
        outcome = player.get_run_info()
        outcome['generation'] = self.generation
        outcome['index'] = member.get_name()
        return outcome
Esempio n. 13
0
class Worker(object):
    def __init__(self, genome, config):
        self.genome = genome
        self.config = config
        env = gym_super_mario_bros.make('SuperMarioBros-1-1-v2')
        self.env = JoypadSpace(env, RIGHT_ONLY)

    def work(self):

        ob = self.env.reset()

        inx = int(ob.shape[0] / 8)
        iny = int(ob.shape[1] / 8)
        done = False

        net = neat.nn.FeedForwardNetwork.create(self.genome, self.config)

        fitness = 0
        xpos = 0
        xpos_max = 0
        counter = 0
        imgarray = []

        while not done:
            # cv2.namedWindow("main", cv2.WINDOW_NORMAL)
            ob = cv2.resize(ob, (inx, iny))
            ob = cv2.cvtColor(ob, cv2.COLOR_BGR2GRAY)
            ob = np.reshape(ob, (inx, iny))

            imgarray = np.ndarray.flatten(ob)

            #print("Test",self.env.action_space)
            actions = net.activate(imgarray)
            action = np.argmax(actions)

            ob, rew, done, info = self.env.step(action)

            xpos = info['x_pos']

            if xpos > xpos_max:
                xpos_max = xpos
                counter = 0
            else:
                counter += 1

            if counter > 250:
                done = True
            if info['flag_get']:
                print("Finished")
                done = True

        print("Worker Fitness:{}".format(xpos))
        return int(xpos)
Esempio n. 14
0
class Player(object):
    def __init__(self, genome, config, record):
        self.genome = genome
        self.config = config
        self.x = 13
        self.y = 15
        self.w = 18
        self.h = 17
        self.record = record

    def play(self):
        self.env = gym_super_mario_bros.make('SuperMarioBros-v3')
        self.env = JoypadSpace(self.env, RIGHT_ONLY)
        self.env.reset()
        observation, _, _, _ = self.env.step(self.env.action_space.sample())
        done = False
        net = neat.nn.FeedForwardNetwork.create(bestGenome, self.config)
        fourcc = cv2.VideoWriter_fourcc(*'XVID')
        fourcc2 = cv2.VideoWriter_fourcc(*'XVID')
        mainVideo = cv2.VideoWriter('./Videos/mainWindow.avi', fourcc, 60.0,
                                    (256, 240))
        smallWindow = cv2.VideoWriter('./Videos/smallWindow.avi', fourcc2,
                                      60.0, (self.w * 8, self.h * 8))
        while not done:
            self.env.render()
            frame = observation
            observation = observation[self.y * 8:self.y * 8 + self.h * 8,
                                      self.x * 8:self.x * 8 + self.w * 8]
            if record:
                mainVideo.write(frame)
                smallWindow.write(observation)
            cv2.imshow('main', observation)
            observation = cv2.resize(observation, (self.w, self.h))
            observation = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
            observation = cv2.resize(observation, (self.w, self.h))
            imgarray = np.ndarray.flatten(observation)
            imgarray = np.interp(imgarray, (0, 254), (-1, +1))
            nnOutput = net.activate(imgarray)
            observation, reward, done, info = self.env.step(
                nnOutput.index(max(nnOutput)))
Esempio n. 15
0
def mario(v, lock):
    env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
    env = JoypadSpace(env, COMPLEX_MOVEMENT)
    done = True
    while True:
        if done:
            env.reset()
            with lock:
                v.value = 0
        with lock:
            u = v.value
        _, _, done, _ = env.step(u)
        env.render()
        sleep(0.01)
Esempio n. 16
0
class Agent:
    def __init__(self, height, width, env_name='SuperMarioBros-v0'):
        # Create gym environment
        self.env = gym_super_mario_bros.make(env_name)

        # Adding actions to the environment
        self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT)

        self.num_actions = self.env.action_space.n
        # Define state as a queue
        self.obs = deque(maxlen=4)
        self.height = height
        self.width = width

        # Initialize state with empty frames
        self.obs.append(np.zeros((height, width)))
        self.obs.append(np.zeros((height, width)))
        self.obs.append(np.zeros((height, width)))
        self.obs.append(np.zeros((height, width)))

        self.env.reset()

    def randomAction(self):
        return random.randint(0, self.num_actions - 1)

    def play(self, act, curr_time, skip_frame=4):

        current_state = self.obs.copy()
        current_state = np.array(current_state)
        current_state = current_state.transpose(1, 2, 0)

        r = 0
        for _ in range(0, skip_frame):
            state, reward, done, info = self.env.step(act)
            r = r + reward
            if done or info['time'] <= 1 or info['time'] > curr_time:
                r = r + (-100)
                done = True
                break
            curr_time = info['time']

        state = resize(Utils.pre_process(state), (self.height, self.width),
                       anti_aliasing=True)

        self.obs.append(state)
        next_state = self.obs.copy()
        next_state = np.array(next_state)
        next_state = next_state.transpose(1, 2, 0)
        return current_state, next_state, r, done, curr_time
Esempio n. 17
0
def main():

    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)

    done = False

    for e in range(100):
        state = env.reset()

        while not done:
            env.render()
            state, reward, done, info = env.step(env.action_space.sample())

    env.close()
Esempio n. 18
0
def contra_game_render():
    env = gym.make('Contra-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    print("actions", env.action_space)
    print("observation_space ", env.observation_space.shape)
    done = False
    env.reset()
    for step in range(5000):
        if done:
            print("Over")
            break
        state, reward, done, info = env.step(env.action_space.sample())
        env.render()

    env.close()
Esempio n. 19
0
def main():
    env = gym.make('SuperMarioBros-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    obs_shape = env.observation_space.shape
    obs_size = reduce(operator.mul, obs_shape, 1)
    action_size = env.action_space.n

    q = MLP(obs_size, action_size)
    q_target = MLP(obs_size, action_size)
    q_target.load_state_dict(q.state_dict())
    if torch.cuda.is_available():
        q = q.cuda()
        q_target = q_target.cuda()

    memory = ReplayBuffer()
    print_interval = 20
    score = 0.0
    optimizer = optim.Adam(q.parameters(), lr=learning_rate)

    for n_epi in range(10000):
        epsilon = max(0.01, 0.08 - 0.01 *
                      (n_epi / 200))  # Linear annealing from 8% to 1%
        s = env.reset()
        done = False

        while not done:
            a = q.sample_action(torch.from_numpy(np.array(s)).float(), epsilon)
            s_prime, r, done, info = env.step(a)
            done_mask = 0.0 if done else 1.0
            memory.put((s, a, r / 100.0, s_prime, done_mask))
            s = s_prime

            score += r
            if done:
                break

        if memory.size() > 2000:
            train(q, q_target, memory, optimizer)

        if n_epi % print_interval == 0 and n_epi != 0:
            q_target.load_state_dict(q.state_dict())
            print(
                "n_episode :{}, score : {:.1f}, n_buffer : {}, eps : {:.1f}%".
                format(n_epi, score / print_interval, memory.size(),
                       epsilon * 100))
            score = 0.0
    env.close()
Esempio n. 20
0
class agent:
    def __init__(self):
        self.env = gym_super_mario_bros.make('SuperMarioBros-v0')
        self.env = JoypadSpace(self.env, SIMPLE_MOVEMENT)
        self.size = self.env.observation_space.shape
        self.options = self.env.action_space.n
        self.baseline = 0

    def get_screen(self):
        self.env.render()

    def close(self):
        self.env.close()

    def doStep(self, a):
        sP, r, done, info = self.env.step(a)
        return r, done, sP
def gym_SuperMarioBros_env_test():
    """
    `pip install gym-super-mario-bros==7.3.0`
    """
    import gym_super_mario_bros
    from nes_py.wrappers import JoypadSpace

    # Initialize Super Mario environment
    env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0")

    # Limit the action-space to
    #   0. walk right
    #   1. jump right
    env = JoypadSpace(env, [["right"], ["right", "A"]])

    env.reset()
    next_state, reward, done, info = env.step(action=0)
    print(f"{next_state.shape},\n {reward},\n {done},\n {info}")
Esempio n. 22
0
def play_random_custom(env, steps):
    _NOP = 0

    actions = [['start'], ['NOOP'], ['right', 'A'], ['left', 'A'],
               ['left', 'B'], ['right', 'B'], ['up'], ['down'], ['A'], ['B']]

    env = JoypadSpace(env, actions)

    env.reset()

    action = 0
    start = time.time()
    # play_human
    for t in range(0, steps):
        # get the mapping of keyboard keys to actions in the environment
        if hasattr(env, 'get_keys_to_action'):
            keys_to_action = env.get_keys_to_action()
        elif hasattr(env.unwrapped, 'get_keys_to_action'):
            keys_to_action = env.unwrapped.get_keys_to_action()
        else:
            raise ValueError('env has no get_keys_to_action method')

        # # change action every 6 frames
        if t % 6 == 0:
            action = env.action_space.sample()

            # after 500 timesteps, stop pressing start button
            if t > 500:
                while action == 0:
                    action = env.action_space.sample()

        observation, reward, done, info = env.step(action)
        # print("---------------------------t: ", t)
        # print("action space: ", action, env.action_space)
        # print("obs: ", observation)
        # print("reward: ", reward)
        # print("info: ", info)
        # runs game at about 60fps
        time.sleep(0.016667)
        env.render()

    end = time.time()
    env.close()
    print("time: ", (end - start), " seconds  for ", steps, "steps")
Esempio n. 23
0
def main():
    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)

    gamma = 0.9
    epsilon = .95

    trials = 1000
    trial_len = 500

    dqn_agent = DQN(env=env)
    steps = []
    for trial in range(trials):

        cur_state = env.reset()
        cur_state = cur_state.reshape(1, 184320)

        cur_reward = 0
        for step in range(trial_len):

            action = dqn_agent.act(cur_state)
            new_state, reward, done, _ = env.step(action)

            env.render()

            cur_reward += reward

            new_state = new_state.reshape(1, 184320)
            dqn_agent.remember(cur_state, action, reward, new_state, done)

            dqn_agent.replay()
            dqn_agent.target_train()

            cur_state = new_state
            if done:
                break
        if reward <= 199.0:
            print("Failed to complete in trial: " + str(trial) + " reward: " +
                  str(cur_reward))
        else:
            print("Completed in trial: " + str(trail) + " reward: " +
                  str(cur_reward))
            break
def record_one_episode(agent, episode):
    tmp_env = gym_super_mario_bros.make(LEVEL_NAME)
    tmp_env = JoypadSpace(tmp_env, ACTION_SPACE)
    tmp_env = Monitor(tmp_env, './videos/video-episode-{0:05d}'.format(episode), force=True)
    tmp_env = wrapper(tmp_env, FRAME_DIM, FRAME_SKIP)

    state = lazy_frame_to_tensor(tmp_env.reset())

    total_reward = 0
    while True:
        action = agent.get_action(state)

        next_state, reward, done, info = tmp_env.step(action)
        next_state = lazy_frame_to_tensor(next_state)

        if done:
            break

        total_reward += reward
        state = next_state
Esempio n. 25
0
def run_random_actions():
    """
    randomly take 1 of the 12 complex movement actions
    and print action, rewards
    """
    env = JoypadSpace(gym_super_mario_bros.make('SuperMarioBros-v0'),
                      COMPLEX_MOVEMENT)

    done = True
    for step in range(50):
        if done:
            env.reset()
        # randomly take an action from action_space
        random_action = env.action_space.sample()
        # info returns meta-data incl. coins, life, score etc.
        # state is RGB image (240, 256, 3)
        state, reward, done, info = env.step(random_action)
        print('# {}: Action: {}, Reward: {}, Done: {}'.format(
            step, random_action, reward, done))
    env.close()
Esempio n. 26
0
def main():
    env = gym_super_mario_bros.make('SuperMarioBros-v1')
    env = JoypadSpace(env, USE_MOVEMENT)
    interval = 20
    q = QNetWork()
    q_target = QNetWork()
    input_shape = (batch_size, 240, 256, 3)
    q.build(input_shape=input_shape)
    q_target.build(input_shape=input_shape)
    for src, dest in zip(q.variables, q_target.variables):
        dest.assign(src)
    memory = ReplayBuffer()

    score = 0.
    optimizer = optimizers.Adam(lr=learning_rate)
    for n_epi in range(10000):
        eqsilon = max(0.01, 0.08 - 0.01 * (n_epi / 200))
        s = env.reset()
        for t in range(10000):
            a = q.sample_action(s, eqsilon)
            s_prime, r, done, _ = env.step(a)
            env.render()
            done = 0. if done else 1.
            memory.put((s, a, r, s_prime, done))
            s = s_prime
            score += r
            if not done:
                break
        print ("epeide :   {} ".format(n_epi))
        if memory.size() > 100:
            train(q, q_target, memory, optimizer)
        # print("22,  ", tf.size(q), tf.size(q))
        if n_epi % interval == 0 and n_epi != 0:
            # print(q.variables, q_target.variables)
            for src, dest in zip(q.variables, q_target.variables):
                dest.assign(src)  # 影子网络权值来自Q
            print(" # of epsode {}, avg_score {}, buffer size {}".format(n_epi, score/interval, memory.size()))
            score = 0.
        if n_epi % 200 == 0 and not n_epi:
            q_target.network.save_weights('dqn_weights{}.ckpt'.format(int(n_epi / 200)))
    env.close()
Esempio n. 27
0
def record_one_episode(agent):
    tmp_env = gym_super_mario_bros.make(LEVEL_NAME)
    tmp_env = JoypadSpace(tmp_env, ACTION_SPACE)
    tmp_env = Monitor(tmp_env, './video', force=True)
    tmp_env = wrapper(tmp_env, FRAME_DIM)

    state = lazy_frame_to_tensor(tmp_env.reset())

    total_reward = 0
    while True:
        action, _ = agent.select_action_based_on_state(state)

        next_state, reward, done, info = tmp_env.step(action)
        next_state = lazy_frame_to_tensor(next_state)

        if done:
            break

        total_reward += reward

        state = next_state
Esempio n. 28
0
def eval_genome(genome):
    env = gym_super_mario_bros.make("SuperMarioBros-1-1-v0")
    env = JoypadSpace(env, COMPLEX_MOVEMENT)

    done = False
    timeout = 100

    state = env.reset()

    rewards = 0

    while not done and timeout > 0:
        state_resized = resize(state,
                               (state.shape[0] // 8, state.shape[1] // 8),
                               anti_aliasing=False)
        state_resized = np.apply_along_axis(
            rgb2dec,
            1,
            (np.reshape(state_resized,
                        (state_resized.shape[0] * state_resized.shape[1], 3)) *
             255),
        )

        state, reward, done, info = env.step(
            np.argmax(genome.evaluate(state_resized)))

        rewards += reward

        if reward <= 0:
            timeout -= 1
        else:
            timeout += 1

        env.render()

    env.close()

    return rewards
Esempio n. 29
0
class Mario:
    def __init__(self, *args, **kwargs):
        self._env = gym_super_mario_bros.make(
            kwargs.get('env', 'SuperMarioBros-v0'))
        self._env = JoypadSpace(self._env, SIMPLE_MOVEMENT)
        self._env.reset()

        self._cur_state = None
        self._cur_reward = None

    def perform_move(self, move):
        state, reward, done, info = self._env.step(move)
        self._cur_state = state
        self._cur_reward = reward

        self._env.render()
        return self._cur_state

    def get_cur_reward(self):
        return self._cur_reward

    def get_cur_state(self):
        return self._cur_state
Esempio n. 30
0
def play_model(args):

    # if gpu is to be used
    device = torch.device(
        "cuda" if torch.cuda.is_available() and args.ngpu > 0 else "cpu")

    # Build env (first level, right only)
    env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)

    # setup networks
    init_screen = get_screen(env, device)
    _, _, screen_height, screen_width = init_screen.shape

    # Get number of actions from gym action space
    args.n_actions = env.action_space.n

    target_net = DQN(screen_height, screen_width, args.n_actions).to(device)

    if args.targetNet:
        target_net.load_state_dict(
            torch.load(args.targetNet, map_location=device))

    with torch.no_grad():
        i = 0
        observation = env.reset()
        while i < 5000:
            env.render()
            state = get_screen(env, device)
            action = int(target_net(state).max(1)[1].view(1, 1))
            observation, reward, done, info = env.step(action)

            if done:
                break
            i += 1

    env.close()