def init_sim(self, p): self.epsilon = p["epsilon"] self.epsilon_decay = p.get("epsilon_decay", 1.0) self.epsilon_min = p.get("epsilon_min", self.epsilon) self.gamma = p["gamma"] ##initialize Qsa arbitrarily self.vel_bound = p["vel_bound"] self.pos_bound = p["pos_bound"] self.angle_vel_bound = p["angle_vel_bound"] self.state_min = [0.0, -self.vel_bound, -self.pos_bound, -self.angle_vel_bound] self.state_max = [2 * math.pi, self.vel_bound, self.pos_bound, self.angle_vel_bound] self.episode = 0 self.num_actions = 3 if p["qsa_type"] == "tabular": self.state_transformer = null_transformer() self.qsa = tabular_qsa() self.state_size = [p["angle_bins"], p["angle_vel_bins"], p["pos_bins"], p["vel_bins"]] self.qsa.init(self.state_min, self.state_max, self.state_size, self.num_actions) self.alpha = p["learning_rate"] elif p["qsa_type"] == "nnet": self.qsa = nnet_qsa() self.qsa.init(self.state_min, self.state_max, self.num_actions, p) # The neural network has its own internal learning rate (alpha is ignored) self.alpha = 1.0 elif p["qsa_type"] == "cartpole_nnet": self.qsa = cartpole_nnet_qsa() self.qsa.init(self.state_min, self.state_max, self.num_actions, p) # The neural network has its own internal learning rate (alpha is ignored) self.alpha = 1.0 elif p["qsa_type"] == "cluster_nnet": self.qsa = cluster_nnet_qsa() self.qsa.init(self.state_min, self.state_max, self.num_actions, p) # The neural network has its own internal learning rate (alpha is ignored) self.alpha = 1.0 elif p["qsa_type"] == "recurrent_cartpole_nnet": self.state_transformer = cartpole_state_transformer() self.state_transformer.init( p.get("do_trig_transform", False), p.get("create_pomdp", False), p.get("state_dupe_count", 1), p ) print ("state dupe count: " + str(p.get("state_dupe_count", 1))) self.qsa = recurrent_cartpole_nnet_qsa() self.qsa.init(self.state_transformer.num_states, self.num_actions, p) # The neural network has its own internal learning rate (alpha is ignored) self.alpha = 1.0
def init_sim(self,p): self.epsilon = p['epsilon'] self.epsilon_decay = p.get('epsilon_decay',1.0) self.epsilon_min = p.get('epsilon_min',self.epsilon) self.gamma = p['gamma'] ##initialize Qsa arbitrarily self.vel_bound = p['vel_bound'] self.pos_bound = p['pos_bound'] self.angle_vel_bound = p['angle_vel_bound'] self.state_min = [0.0, -self.vel_bound, -self.pos_bound, -self.angle_vel_bound] self.state_max = [2*math.pi, self.vel_bound, self.pos_bound, self.angle_vel_bound] self.episode = 0 self.num_actions = 3 if(p['qsa_type'] == 'tabular'): self.state_transformer = null_transformer() self.qsa = tabular_qsa() self.state_size = [p['angle_bins'],p['angle_vel_bins'],p['pos_bins'],p['vel_bins']] self.qsa.init(self.state_min,self.state_max,self.state_size,self.num_actions) self.alpha = p['learning_rate'] elif(p['qsa_type'] == 'nnet'): self.qsa = nnet_qsa() self.qsa.init(self.state_min,self.state_max,self.num_actions,p) #The neural network has its own internal learning rate (alpha is ignored) self.alpha = 1.0 elif(p['qsa_type'] == 'cartpole_nnet'): self.qsa = cartpole_nnet_qsa() self.qsa.init(self.state_min,self.state_max,self.num_actions,p) #The neural network has its own internal learning rate (alpha is ignored) self.alpha = 1.0 elif(p['qsa_type'] == 'cluster_nnet'): self.qsa = cluster_nnet_qsa() self.qsa.init(self.state_min,self.state_max,self.num_actions,p) #The neural network has its own internal learning rate (alpha is ignored) self.alpha = 1.0 elif(p['qsa_type'] == 'recurrent_cartpole_nnet'): self.state_transformer = cartpole_state_transformer() self.state_transformer.init(p.get('do_trig_transform',False), p.get('create_pomdp',False),p.get('state_dupe_count',1),p) print("state dupe count: " + str(p.get('state_dupe_count',1))) self.qsa = recurrent_cartpole_nnet_qsa() self.qsa.init(self.state_transformer.num_states,self.num_actions,p) #The neural network has its own internal learning rate (alpha is ignored) self.alpha = 1.0
def load_results(self,filename,p): self.results = load_h5py(filename,p) self.epsilon = self.results['epsilon'].value self.epsilon_decay = self.results['epsilon_decay'].value self.epsilon_min = self.results['epsilon_min'].value self.alpha = self.results['alpha'].value self.gamma = self.results['gamma'].value self.state_min = list(self.results['state_min']) self.state_max = list(self.results['state_max']) self.state_size = list(self.results['state_size']) self.episode = self.results['episode'].value self.num_actions = 3 self.qsa = tabular_qsa() self.qsa.init(self.state_min,self.state_max,self.state_size,self.num_actions) self.qsa.data = np.array(self.results['qsa_values']) print('loaded epsilon: ' + str(self.epsilon))
def load_results(self, filename, p): self.results = load_h5py(filename, p) self.epsilon = self.results["epsilon"].value self.epsilon_decay = self.results["epsilon_decay"].value self.epsilon_min = self.results["epsilon_min"].value self.alpha = self.results["alpha"].value self.gamma = self.results["gamma"].value self.state_min = list(self.results["state_min"]) self.state_max = list(self.results["state_max"]) self.state_size = list(self.results["state_size"]) self.episode = self.results["episode"].value self.num_actions = 3 self.qsa = tabular_qsa() self.qsa.init(self.state_min, self.state_max, self.state_size, self.num_actions) self.qsa.data = np.array(self.results["qsa_values"]) print ("loaded epsilon: " + str(self.epsilon))