def train(self, training_games_to_play = 500):
        training_progress_log = rysy.Log(self.network_path + "progress_training.log")

        if self.agent_type == "curiosity":
            icm_training_progress_log = rysy.Log(self.network_path + "icm_progress_training.log")


        self.training_games_to_play = training_games_to_play
        #process training
        while self.env.get_games_count() < self.training_games_to_play:
            result = self.agent.main()
            if result != 0:
                print("ERROR : agent returned ", result, "\n\n\n\n")
                return result

            #print training progress %, and score, every 256th iterations
            if self.env.get_iterations()%256 == 0:
                str_progress = str(self.env.get_iterations()) + " "
                str_progress+= str(self.env.get_games_count()) + " "
                str_progress+= str(self.agent.get_epsilon_start()) + " "
                str_progress+= str(self.env.get_score()) + " "

                str_progress+= str(self.env.get_active_env_id()) + " "


                score = self.env.get_envs_score()
                games = self.env.get_envs_games_count()
                for i in range(0, len(score)):
                    str_progress+= str(games[i]) + " "
                    str_progress+= str(score[i]) + " "

                str_progress+= "\n"
                training_progress_log.put_string(str_progress)

                if self.agent_type == "curiosity":
                    str_icm_progress = str(self.env.get_iterations()) + " "
                    str_icm_progress+= str(self.env.get_games_count()) + " "
                    str_icm_progress+= str(self.agent.get_icm_result().inverse_loss) + " "
                    str_icm_progress+= str(self.agent.get_icm_result().forward_loss) + " "
                    str_icm_progress+= str(self.agent.get_icm_result().inverse_classification_success) + " "
                    str_icm_progress+= "\n"
                    icm_training_progress_log.put_string(str_icm_progress)

                print("done = ", self.env.get_games_count()*100.0/self.training_games_to_play, "%", " eps = ", self.agent.get_epsilon_start(), " iterations = ",  self.env.get_iterations(), " score = ",  self.env.get_score(), " active_env = ", self.env.get_active_env_id())

            if self.env.get_iterations()%50000 == 0:
                print("SAVING network")
                self.agent.save(self.network_path)

        self.agent.save(self.network_path)
        return 0
Example #2
0
    def train(self, training_games_to_play=500):
        training_progress_log = rysy.Log(self.agent_config_path +
                                         "progress_training.log")

        self.training_games_to_play = training_games_to_play
        #process training
        while self.env.get_games_count() < self.training_games_to_play:
            self.agent.main()

            #print training progress %, ane score, every 256th iterations
            if self.env.get_iterations() % 256 == 0:
                str_progress = str(self.env.get_iterations()) + " "
                str_progress += str(self.env.get_games_count()) + " "
                str_progress += str(self.env.get_score()) + " "
                str_progress += "\n"
                training_progress_log.put_string(str_progress)

                print(
                    "done = ",
                    self.env.get_games_count() * 100.0 /
                    self.training_games_to_play, "%", " iterations = ",
                    self.env.get_iterations(), " score = ",
                    self.env.get_score())

            if self.env.get_iterations() % 50000 == 0:
                print("SAVING network")
                self.agent.save(self.agent_config_path)

        self.agent.save(self.agent_config_path)
Example #3
0
    def test(self, testing_games_to_play=100):
        testing_progress_log = rysy.Log(self.network_path +
                                        "progress_testing.log")

        self.agent.load(self.network_path)

        #reset score
        self.env.reset_score()

        #choose only the best action
        self.agent.run_best_enable()

        #process testing games
        while self.env.get_games_count(
        ) < testing_games_to_play + self.training_games_to_play:
            self.agent.main()

            if self.env.get_iterations() % 256 == 0:
                str_progress = str(self.env.get_iterations()) + " "
                str_progress += str(self.env.get_games_count() -
                                    +self.training_games_to_play) + " "
                str_progress += str(self.agent.get_epsilon_training()) + " "
                str_progress += str(self.env.get_score()) + " "
                str_progress += "\n"
                testing_progress_log.put_string(str_progress)

        print("TESTING SCORE =", self.env.get_score())

        return self.env.get_score()
Example #4
0
    def train(self, training_games_to_play=500):
        training_progress_log = rysy.Log(self.network_path +
                                         "progress_training.log")
        icm_training_progress_log = rysy.Log(self.network_path +
                                             "icm_progress_training.log")

        self.training_games_to_play = training_games_to_play
        #process training
        while self.env.get_games_count() < self.training_games_to_play:
            self.agent.main()

            #print training progress %, ane score, every 256th iterations
            if self.env.get_iterations() % 256 == 0:
                str_progress = str(self.env.get_iterations()) + " "
                str_progress += str(self.env.get_games_count()) + " "
                str_progress += str(self.agent.get_epsilon_training()) + " "
                str_progress += str(self.env.get_score()) + " "
                str_progress += "\n"
                training_progress_log.put_string(str_progress)

                print(
                    "done = ",
                    self.env.get_games_count() * 100.0 /
                    self.training_games_to_play, "%", " eps = ",
                    self.agent.get_epsilon_training(), " iterations = ",
                    self.env.get_iterations(), " score = ",
                    self.env.get_score())

            if self.env.get_iterations() % 256 == 0:
                str_icm_progress = str(self.env.get_iterations()) + " "
                str_icm_progress += str(self.env.get_games_count()) + " "
                str_icm_progress += str(
                    self.agent.get_icm_result().inverse_loss) + " "
                str_icm_progress += str(
                    self.agent.get_icm_result().forward_loss) + " "
                str_icm_progress += str(self.agent.get_icm_result().
                                        inverse_classification_success) + " "
                str_icm_progress += "\n"
                icm_training_progress_log.put_string(str_icm_progress)

            if self.env.get_iterations() % 50000 == 0:
                print("SAVING network")
                self.agent.save(self.network_path)

        self.agent.save(self.network_path)
Example #5
0
    def __init__(self):
        env.Env.__init__(self)

        self.width = 4
        self.height = 4
        self.depth = 1
        self.time = 1

        self.actions_count = 4

        self.observation_init()
        self.reset()

        self.gui = gl_gui.GLVisualisation()

        self.max_value = 0.0
        self.max_score_log = rysy.Log("2048_max_score.log")
    def test(self, testing_games_to_play = 100):
        testing_progress_log = rysy.Log(self.network_path + "progress_testing.log")

        self.agent.load(self.network_path + "trained/")

        #reset score
        self.env.reset_score()

        #choose only the best action
        self.agent.run_best_enable()


        #process testing games
        while self.env.get_games_count() < testing_games_to_play + self.training_games_to_play:
            result = self.agent.main()
            if result != 0:
                print("ERROR : agent returned ", result, "\n\n\n\n")
                return result

            if self.env.get_iterations()%256 == 0:

                str_progress = str(self.env.get_iterations()) + " "
                str_progress+= str(self.env.get_games_count() - self.training_games_to_play) + " "
                str_progress+= str(self.agent.get_epsilon_start()) + " "
                str_progress+= str(self.env.get_score()) + " "

                str_progress+= str(self.env.get_active_env_id()) + " "

                score = self.env.get_envs_score()
                games = self.env.get_envs_games_count()
                for i in range(0, len(score)):
                    str_progress+= str(games[i]) + " "
                    str_progress+= str(score[i]) + " "

                str_progress+= "\n"
                testing_progress_log.put_string(str_progress)


        print("TESTING SCORE =", env.get_score())

        return env.get_score()
Example #7
0
    def test(self, log_filename_prefix, testing_games_to_play=100):
        self.agent.load(self.agent_config_path)

        #choose only the best action
        self.agent.run_best_enable()

        score = []
        game_id = 0

        #process testing games
        while self.env.get_games_count(
        ) < testing_games_to_play + self.training_games_to_play:
            self.agent.main()

            if self.env.get_games_count() != game_id:
                game_id = self.env.get_games_count()
                score.append(self.env.get_score())

                self.env.reset_score()

                print(score)

        mean_score = numpy.mean(score)
        std = numpy.std(score)

        result = "games count : " + str(len(score)) + "\n"
        result += "mean score : " + str(mean_score) + "\n"
        result += "std score : " + str(std) + "\n"

        result += "games : " + "\n"
        for i in range(0, len(score)):
            result += str(score[i]) + "\n"

        testing_progress_log = rysy.Log(self.agent_config_path +
                                        log_filename_prefix +
                                        "result_testing.log")
        testing_progress_log.put_string(result)
Example #8
0
epsilon_decay       = 0.99999

#init DQN agent
agent = libs.libs_agent.agent_dqn.DQNAgent(env, network_path + "network_config.json", gamma, replay_buffer_size, epsilon_training, epsilon_testing, epsilon_decay)

'''
agent.load(network_path + "trained/")

agent.run_best_enable()

while True:
    agent.main()
    env._print()
'''

training_progress_log = rysy.Log(network_path + "progress_training.log")
testing_progress_log = rysy.Log(network_path + "progress_testing.log")

#process training
total_games_to_play = 20000
while env.get_games_count() < total_games_to_play:
    agent.main()

    #print training progress %, ane score, every 256th iterations
    if verbose:
        if env.get_iterations()%256 == 0:
            env._print()
            env.render()


    if env.get_iterations()%256 == 0: