def __init__(self): self.env = Environment(Shape.Worm()) self.checkpoint_dir = os.path.join(os.path.dirname(__file__), 'mlflow') self.best_run = 0 self.addative_noise_generator = noise_generators.OUActionNoise( output_size=self.env.action_size) self.multiplier_noise_generator = noise_generators.MarkovSaltPepperNoise( output_size=self.env.action_size) self.init_models() self.target_actor.set_weights(self.actor_model.get_weights()) self.target_critic.set_weights(self.critic_model.get_weights()) self.critic_optimizer = tf.keras.optimizers.RMSprop( learning_rate=self.CRITIC_LR) self.actor_optimizer = tf.keras.optimizers.RMSprop( learning_rate=self.ACTOR_LR) self.buffer = PrioritizedBuffer(self.env.state_size, self.env.action_size, self.GAMMA, self.BUFFER_SIZE, self.BATCH_SIZE) self.episode_reward_history = [] # show controls the appearance of a window with graphics, controlled by 's' and 'a' on the keyboard self.show = False # show controls if the model is learning or not, affects the FPS of the graphics # controlled by 'l' and 'k' on the keyboard self.learn = True self.log_params()