if episode_rewards_sum > 0: suc_count += 1 print("-----------------------") print("Episode: ", epoch) print("Reward: ", episode_rewards_sum) print("Max reward during train: ", max_reward) print("-----------------------") epoche_rewards = model.calc_reward(epoche_rewards) replBuffer.append(epoche_observations, epoche_actions, epoche_rewards) model.fit(epoche_observations, epoche_actions, epoche_rewards, replBuffer) epoche_observations = [] epoche_actions = [] epoche_rewards = [] training_version = load_version + ( epochs_count - current_epoch) // save_period save_path = "res/{}/{}/LunarLander-v2.ckpt".format( train_model_name, training_version) model.save_model(save_path) break # Save new observation state = state_