def init_sim(self, p):
        self.epsilon = p["epsilon"]
        self.epsilon_decay = p.get("epsilon_decay", 1.0)
        self.epsilon_min = p.get("epsilon_min", self.epsilon)
        self.gamma = p["gamma"]

        ##initialize Qsa arbitrarily
        self.vel_bound = p["vel_bound"]
        self.pos_bound = p["pos_bound"]
        self.angle_vel_bound = p["angle_vel_bound"]
        self.state_min = [0.0, -self.vel_bound, -self.pos_bound, -self.angle_vel_bound]
        self.state_max = [2 * math.pi, self.vel_bound, self.pos_bound, self.angle_vel_bound]

        self.episode = 0

        self.num_actions = 3
        if p["qsa_type"] == "tabular":
            self.state_transformer = null_transformer()
            self.qsa = tabular_qsa()
            self.state_size = [p["angle_bins"], p["angle_vel_bins"], p["pos_bins"], p["vel_bins"]]
            self.qsa.init(self.state_min, self.state_max, self.state_size, self.num_actions)
            self.alpha = p["learning_rate"]
        elif p["qsa_type"] == "nnet":
            self.qsa = nnet_qsa()
            self.qsa.init(self.state_min, self.state_max, self.num_actions, p)
            # The neural network has its own internal learning rate (alpha is ignored)
            self.alpha = 1.0
        elif p["qsa_type"] == "cartpole_nnet":
            self.qsa = cartpole_nnet_qsa()
            self.qsa.init(self.state_min, self.state_max, self.num_actions, p)
            # The neural network has its own internal learning rate (alpha is ignored)
            self.alpha = 1.0
        elif p["qsa_type"] == "cluster_nnet":
            self.qsa = cluster_nnet_qsa()
            self.qsa.init(self.state_min, self.state_max, self.num_actions, p)
            # The neural network has its own internal learning rate (alpha is ignored)
            self.alpha = 1.0
        elif p["qsa_type"] == "recurrent_cartpole_nnet":

            self.state_transformer = cartpole_state_transformer()
            self.state_transformer.init(
                p.get("do_trig_transform", False), p.get("create_pomdp", False), p.get("state_dupe_count", 1), p
            )
            print ("state dupe count: " + str(p.get("state_dupe_count", 1)))

            self.qsa = recurrent_cartpole_nnet_qsa()
            self.qsa.init(self.state_transformer.num_states, self.num_actions, p)

            # The neural network has its own internal learning rate (alpha is ignored)
            self.alpha = 1.0
    def init_sim(self,p):
        self.epsilon = p['epsilon']
        self.epsilon_decay = p.get('epsilon_decay',1.0)
        self.epsilon_min = p.get('epsilon_min',self.epsilon)
        self.gamma = p['gamma']

        ##initialize Qsa arbitrarily
        self.vel_bound = p['vel_bound']
        self.pos_bound = p['pos_bound']
        self.angle_vel_bound = p['angle_vel_bound']
        self.state_min = [0.0, -self.vel_bound, -self.pos_bound, -self.angle_vel_bound]
        self.state_max = [2*math.pi,  self.vel_bound,  self.pos_bound,  self.angle_vel_bound]

        self.episode = 0

        self.num_actions = 3
        if(p['qsa_type'] == 'tabular'):
            self.state_transformer = null_transformer()
            self.qsa = tabular_qsa()
            self.state_size = [p['angle_bins'],p['angle_vel_bins'],p['pos_bins'],p['vel_bins']]
            self.qsa.init(self.state_min,self.state_max,self.state_size,self.num_actions)
            self.alpha = p['learning_rate']
        elif(p['qsa_type'] == 'nnet'):
            self.qsa = nnet_qsa()
            self.qsa.init(self.state_min,self.state_max,self.num_actions,p)
            #The neural network has its own internal learning rate (alpha is ignored)
            self.alpha = 1.0
        elif(p['qsa_type'] == 'cartpole_nnet'):
            self.qsa = cartpole_nnet_qsa()
            self.qsa.init(self.state_min,self.state_max,self.num_actions,p)
            #The neural network has its own internal learning rate (alpha is ignored)
            self.alpha = 1.0
        elif(p['qsa_type'] == 'cluster_nnet'):
            self.qsa = cluster_nnet_qsa()
            self.qsa.init(self.state_min,self.state_max,self.num_actions,p)
            #The neural network has its own internal learning rate (alpha is ignored)
            self.alpha = 1.0
        elif(p['qsa_type'] == 'recurrent_cartpole_nnet'):

            self.state_transformer = cartpole_state_transformer()
            self.state_transformer.init(p.get('do_trig_transform',False),
                p.get('create_pomdp',False),p.get('state_dupe_count',1),p)
            print("state dupe count: " + str(p.get('state_dupe_count',1)))

            self.qsa = recurrent_cartpole_nnet_qsa()
            self.qsa.init(self.state_transformer.num_states,self.num_actions,p)
            
            #The neural network has its own internal learning rate (alpha is ignored)
            self.alpha = 1.0
    def load_results(self,filename,p):
        self.results = load_h5py(filename,p)

        self.epsilon = self.results['epsilon'].value
        self.epsilon_decay = self.results['epsilon_decay'].value
        self.epsilon_min = self.results['epsilon_min'].value
        self.alpha = self.results['alpha'].value
        self.gamma = self.results['gamma'].value
        self.state_min = list(self.results['state_min'])
        self.state_max = list(self.results['state_max'])
        self.state_size = list(self.results['state_size'])
        self.episode = self.results['episode'].value
        self.num_actions = 3
        self.qsa = tabular_qsa()
        self.qsa.init(self.state_min,self.state_max,self.state_size,self.num_actions)
        self.qsa.data = np.array(self.results['qsa_values'])
        print('loaded epsilon: ' + str(self.epsilon))
    def load_results(self, filename, p):
        self.results = load_h5py(filename, p)

        self.epsilon = self.results["epsilon"].value
        self.epsilon_decay = self.results["epsilon_decay"].value
        self.epsilon_min = self.results["epsilon_min"].value
        self.alpha = self.results["alpha"].value
        self.gamma = self.results["gamma"].value
        self.state_min = list(self.results["state_min"])
        self.state_max = list(self.results["state_max"])
        self.state_size = list(self.results["state_size"])
        self.episode = self.results["episode"].value
        self.num_actions = 3
        self.qsa = tabular_qsa()
        self.qsa.init(self.state_min, self.state_max, self.state_size, self.num_actions)
        self.qsa.data = np.array(self.results["qsa_values"])
        print ("loaded epsilon: " + str(self.epsilon))