예제 #1
0
                if episode_rewards_sum > 0:
                    suc_count += 1

                print("-----------------------")
                print("Episode: ", epoch)
                print("Reward: ", episode_rewards_sum)
                print("Max reward during train: ", max_reward)
                print("-----------------------")
                epoche_rewards = model.calc_reward(epoche_rewards)
                replBuffer.append(epoche_observations, epoche_actions,
                                  epoche_rewards)

                model.fit(epoche_observations, epoche_actions, epoche_rewards,
                          replBuffer)

                epoche_observations = []
                epoche_actions = []
                epoche_rewards = []

                training_version = load_version + (
                    epochs_count - current_epoch) // save_period

                save_path = "res/{}/{}/LunarLander-v2.ckpt".format(
                    train_model_name, training_version)

                model.save_model(save_path)
                break

            # Save new observation
            state = state_