Esempio n. 1
0
def main():
    esplison = 0.6
    memorydb_instance = MemoryDB('localhost', 'mario-ai', 'replay-memory')
    agent_instance = Agent((50, 75, 4), False)
    i = 1

    while True:
        print(f"experiences size: {memorydb_instance.get_experiences_size()}")
        sampled_experiences, b_idx, b_ISWeights = memorydb_instance.sample(
            sample_size)
        print("sampled_experiences: ", len(sampled_experiences))
        errors = train_with_experience(agent_instance, sampled_experiences,
                                       b_ISWeights, sample_size, epoch,
                                       discount)

        agent_instance.save_model()
        memorydb_instance.update_batch(b_idx, errors, sampled_experiences)
        if i % training_before_update_target == 0:
            agent_instance.sync_target()
        i += 1
Esempio n. 2
0
def main():
    esplison = 0.6
    io_instance = IO("FCEUX 2.2.3: mario")
    memorydb_instance = MemoryDB('localhost', 'mario-ai', 'replay-memory')
    agent_instance = Agent((50, 75, 4), False)

    i = 1
    while True:
        print(f"experiences size: {memorydb_instance.get_experiences_size()}")
        experience_bacth = []
        io_instance.focus_window()
        io_instance.reset()
        is_termnial = False

        previous_screenshot = io_instance.get_screenshot()
        previous_device_state = io_instance.get_device_state()
        previous_image_state = io_instance.get_stacked_frames(
            previous_screenshot, True)

        while is_termnial != True:
            experience = {}
            experience["terminal"] = False
            experience["screenshot"] = previous_screenshot
            experience["image_state"] = previous_image_state
            experience["device_state"] = previous_device_state

            dice = random.uniform(0.0, 1.0)
            action_index = 0
            if dice >= esplison:
                #if True:
                reward = agent_instance.model_predict([
                    experience["image_state"].reshape(1, 50, 75, 4),
                    experience["device_state"].reshape(1, 4)
                ])

                # reward = output[0]
                # print("###value: ", output[2])
                # print("###advantage: ", output[1])
                print("###reward: ", reward)
                action_index = np.argmax(reward).item()
                print("Model selected action and rewards:", action_index,
                      io_instance.action_mapping_name[action_index])
            else:
                action_index = random.randint(0, 3)

                print("Random selected action:", action_index,
                      io_instance.action_mapping_name[action_index])
            io_instance.action(action_index)

            experience["action_index"] = action_index

            experience[
                "next_screenshot"] = previous_screenshot = io_instance.get_screenshot(
                )
            experience[
                "next_image_state"] = previous_image_state = io_instance.get_stacked_frames(
                    previous_screenshot, False)
            experience[
                "next_device_state"] = previous_device_state = io_instance.get_device_state(
                )
            experience_bacth.append(experience)

            if io_instance.is_termnial(experience["next_screenshot"]):
                experience["terminal"] = True
                is_termnial = True

        calculate_rewards(experience_bacth)
        memorydb_instance.add_batch(experience_bacth)
        print("Experiences updated")

        sampled_experiences, b_idx, b_ISWeights = memorydb_instance.sample(
            sample_size)
        print("sampled_experiences: ", len(sampled_experiences))

        errors = train_with_experience(agent_instance, sampled_experiences,
                                       b_ISWeights, sample_size, epoch,
                                       discount)

        agent_instance.save_model()
        memorydb_instance.update_batch(b_idx, errors, sampled_experiences)

        esplison -= esplison_decay

        if i % training_before_update_target == 0:
            agent_instance.sync_target()
        i += 1
Esempio n. 3
0
            if info["flag_get"]:
                reward = 15

            new_experience = []
            new_experience.append(state)
            new_experience.append(action)
            new_experience.append(reward)
            new_experience.append(done)
            new_experience.append(next_state)
            memorydb_instance.add(new_experience)
            state = next_state

            if memorydb_instance.get_experiences_size(
            ) > experiences_before_training:
                sampled_experiences, b_idx, b_ISWeights = memorydb_instance.sample(
                    sample_size)
                errors = agent_instance.train_with_experience(
                    sampled_experiences, b_ISWeights, epoch, gamma)
                agent_instance.save_model()
                memorydb_instance.update_batch(b_idx, errors,
                                               sampled_experiences)

            score += reward

            if done:
                # every episode update the target model to be same with model (donkey and carrot), carries over to next episdoe
                agent_instance.sync_target()
                scores.append(score)
                episodes.append(e)
                # 'b' is type of marking for plot
                pylab.plot(episodes, scores, 'b')