def __initVars(self, params, model): """ """ self.experimentname = params["experimentname"] self.metrics = {} self.sigmas = params["sigmas"] self.gamma = params["gamma"] self.sigma_decay = params["sigma_decay"] self.drone_controller = DroneController() self.simulation_controller = SimulationController() self.model = self.__getPolicyModel(model) self.directory = self.__setupDir() self.nominal_gates_ref = self.__getNominalGatesRef() self.callbacks = [ TensorBoard( log_dir=self.directory+"/tensorboard", histogram_freq=0, write_graph=True, write_images=True, )] self.nb_timestep = 8 # can be setted via model input shape self.nb_features = 22 # can be setted via model input shape self.nb_gates_nominal = 2 self.nb_gates_ir = 1 self.delta_t = 0.1 self.render = True self.max_stack_size = 2000 self.formater = DataFormater(nb_timesteps=self.nb_timestep) if self.sigma_decay: self.__initSigmas()
def __init__(self, path=None, json_file=None, yaml_file=None, split=0.1, nb_timesteps=6): self.split = split # Search json and yaml in path if path not None # else use directly json_file and yaml_file if path: path = path.rstrip('/') json_file = [file for file in glob("{}/*.json".format(path))] yaml_file = [file for file in glob("{}/*.yaml".format(path))] if len(json_file) != 1: print("No json or more than one file in the specified path.") exit(1) if len(yaml_file) != 1: print("No yaml or more than one file in the specified path.") exit(1) json_file = json_file[0] yaml_file = yaml_file[0] self.path = path self.json_file = json_file self.yaml_file = yaml_file self.data_formater = DataFormater(nb_timesteps=nb_timesteps) self.data_loader = DataLoader(json_file=json_file, yaml_file=yaml_file, nb_gates_nominal=2, nb_gates_ir=1) data = np.array(self.data_loader.data) self.init(data)
def __initVars(self): self.model_predict = self.get_policy_model() self.drone_controller = DroneController() self.nb_gates_nominal = 2 self.nb_gates_ir = 1 self.nominal_gates_ref = self.getNominalGatesRef() self.nb_timestep = 6 self.formater = DataFormater(nb_timesteps=self.nb_timestep) self.delta_t = 0.1 self.nb_features = 22 self.dev = False self.render = True
class SupervisedPredict(SupervisedManager): """docstring for SupervisedPredict.""" def __init__(self, experimentname=None): super(SupervisedPredict, self).__init__(experimentname) self.encoder = DataFormater(nb_timesteps=6) def loadModelFromModel(self, model_path, custom_objects=None): self.model = load_model(model_path, custom_objects) def predict(self, X): # call encoder/decoder here encoded = self.encoder.encode(X) results = self.model.predict(X) results = self.encoder.decode(results) return results
def augmentData(self): out = [] print("Loading data") for i in tqdm(range(1, len(self.raw_data))): prec = self.raw_data[i - 1] line = self.raw_data[i] if not prec["run_id"] == line["run_id"]: continue subout = DataFormater().rawToData(prec, line, self.nb_gates_nominal, self.nb_gates_ir, self.nominal_gates_ref) if subout["droneSpeed"] is not None: out.append(subout) return out
class ReinforcementLearning(object): """docstring for ReinforcementLearning.""" def __init__(self): super(ReinforcementLearning, self).__init__() self.__initVars() def __initVars(self): self.model_predict = self.get_policy_model() self.drone_controller = DroneController() self.nb_gates_nominal = 2 self.nb_gates_ir = 1 self.nominal_gates_ref = self.getNominalGatesRef() self.nb_timestep = 6 self.formater = DataFormater(nb_timesteps=self.nb_timestep) self.delta_t = 0.1 self.nb_features = 22 self.dev = False self.render = True def getNominalGatesRef(self): with open('../resources/nominal_gate_locations.yaml') as f: return yaml.load(f) def get_policy_model(self): compute = CustomLoss().compute model_predict = load_model('/home/bleu/Documents/alphapilot/AlphaPilot/route_planner/ftpilot/graph/100_lambda_all_v2/model.020.h5', custom_objects={'compute': compute}) model_predict.summary() return model_predict def prepro(self, last_observation, observation): out = self.formater.sensorToData( last_observation, observation, self.nb_gates_nominal, self.nb_gates_ir, self.nominal_gates_ref, ) return out def pickStates(self, states): out = [] t = states[-1]['secs'] out[0:0] = [self.formater.encode(self.formater.format_input(states[-1]))] for state in reversed(states[:-1]): if len(out) == self.nb_timestep: return np.array(out).reshape(1,self.nb_timestep,self.nb_features) if t - state['secs'] > self.delta_t: out[0:0] = [self.formater.encode(self.formater.format_input(state))] t = state['secs'] return None def showData(self, data): out = [] for line in data: #out[0:0] = [self.encdec.encode(ground_truth.format_input(line))] out[0:0] = [ground_truth.format_input(line)] out = np.array(out) plt.subplot(121) plt.plot(out[:,:6]) plt.gca().legend(('euler x','euler y','euler z', 'euler2 x', 'euler2 y', 'euler2 z')) plt.subplot(122) plt.plot(out[:,6:12]) #plt.gca().legend(('ir1 x', 'ir1 y', 'ir1 x', 'ir1 y', 'ir2 x', 'ir2 y', 'ir3 x', 'ir3 y', 'ir4 x', 'ir4 y')) plt.gca().legend(('dlr', 'dist1', 'dist2', 'speed x', 'speed y', 'speed z')) plt.show() #print(soa) def run(self): all_states = [] # Setting up our environment observation = self.drone_controller.reset() action = None print('go') cv2.startWindowThread() cv2.namedWindow("preview") while True: last_observation = observation observation, _, done = self.drone_controller.step(action) if done: #self.showData(all_states) return new_state = self.prepro(last_observation, observation) all_states.append(new_state) states = self.pickStates(all_states) if states is not None: if not self.dev: action = self.model_predict.predict(states)[0] else: _, action = self.formater.encode([0.0]*22,[ self.drone_controller.sensors.angular_rates.x, self.drone_controller.sensors.angular_rates.y, self.drone_controller.sensors.angular_rates.z, self.drone_controller.sensors.thrust.z ]) if self.render: self.drone_controller.render(states[0][-1], action) # Append the observations and outputs for learning action = self.formater.decode(action)
def __init__(self, experimentname=None): super(SupervisedPredict, self).__init__(experimentname) self.encoder = DataFormater(nb_timesteps=6)
class ReinforcementLearning(object): """docstring for ReinforcementLearning.""" def __init__(self, params, model=None): super(ReinforcementLearning, self).__init__() self.__initVars(params, model) def __initVars(self, params, model): """ """ self.experimentname = params["experimentname"] self.metrics = {} self.sigmas = params["sigmas"] self.gamma = params["gamma"] self.sigma_decay = params["sigma_decay"] self.drone_controller = DroneController() self.simulation_controller = SimulationController() self.model = self.__getPolicyModel(model) self.directory = self.__setupDir() self.nominal_gates_ref = self.__getNominalGatesRef() self.callbacks = [ TensorBoard( log_dir=self.directory+"/tensorboard", histogram_freq=0, write_graph=True, write_images=True, )] self.nb_timestep = 8 # can be setted via model input shape self.nb_features = 22 # can be setted via model input shape self.nb_gates_nominal = 2 self.nb_gates_ir = 1 self.delta_t = 0.1 self.render = True self.max_stack_size = 2000 self.formater = DataFormater(nb_timesteps=self.nb_timestep) if self.sigma_decay: self.__initSigmas() def __initSigmas(self): """ """ self.weight_gates = [0 for _ in range(11)] self.good_gate = 50 self.bad_gate = -10 self.sigma = 2e-2 self.sigmas = [self.sigma for _ in range(11)] self.sigma_gamma = 0.5 self.sigma_delta = 1.0005 self.weight_max = np.ones([11, 1]) * 10000 # max is 10000 self.weight_min = np.ones([11, 1]) * 0 # min is 0 def __updateSigmas(self): """ Function to update the sigmas of the model training RL. in function of `self.weight_gates` """ for i in range(11): self.sigmas[i] = self.sigma / self.sigma_delta ** self.weight_gates[i] def __updateWeightsGates(self, id_gate, gate_passed=True): """ Function to update the weights of the gate and propagate it to the previous gate in function of the `self.sigma_gamma`. Call the __updateSigmas() function at the end. """ modif = self.good_gate if gate_passed else self.bad_gate while id_gate >= 0 and id_gate < len(self.weight_gates): self.weight_gates[id_gate] += modif modif = math.floor(modif * self.sigma_gamma) if modif == 0: break id_gate -= 1 # Check limits a = np.array(self.weight_gates) a = np.min(np.concatenate((a.reshape(-1, 1), self.weight_max), axis=1), axis=1) a = np.max(np.concatenate((a.reshape(-1, 1), self.weight_min), axis=1), axis=1) self.weight_gates = a.tolist() self.__updateSigmas() def __getNominalGatesRef(self): """ Docstring for __getNominalGatesRef. """ with open("../resources/nominal_gate_locations.yaml") as f: return yaml.load(f) def __getPolicyModel(self, model_path): """ Load last model, using our 'compute' custom loss. """ compute = CustomLoss().compute model = load_model(model_path, custom_objects={"compute": compute}) model.summary() return model def __setupDir(self): """ Docstring for __setupDir. """ directory = "./graph/{}_{}".format( self.experimentname, datetime.datetime.now().strftime("%m%d%H%M") ) if not os.path.exists(directory): os.makedirs(directory) if not os.path.exists(directory+"/tensorboard"): os.makedirs(directory+"/tensorboard") return directory def loadChampionData(self, pickle_name="../resources/data_maxime_test.pkl"): x, y, r = [], [], [] with open(pickle_name, "rb") as f: gt = cPickle.load(f) run_curr = -1 r_data = 1 for run, f in gt.idxs_train: if run_curr != run: gt.data_formater.resetFrames() run_curr = run x_data, y_data = gt.data[run][f] x_data, y_data = gt.data_formater.encode(x_data, y_data) x_data = gt.data_formater.stackFrames(x_data) #x_data = x_data.reshape(x_data.shape[1], x_data.shape[2]) x.append(x_data) y.append(y_data) r.append(r_data) self.champion_x, self.champion_y, self.champion_r = np.array(x), np.array(y), np.array(r) return def __prepareTraining(self, run_data): # Order by run reward sum x_train, y_train, r_train = None, None, None for _ in range(15): # DEBUG: STACK BEST x_train, y_train, r_train = self.__stackFrames(x_train, y_train, r_train, run_data) del run_data[-1] if len(run_data) == 0: break self.__diluteFrames() # Mix up data c = list(zip(x_train.tolist(), y_train.tolist(), r_train.tolist())) random.shuffle(c) x_train, y_train, r_train = zip(*c) # Normalize the reward x_train, y_train = np.array(x_train), np.array(y_train) r_train = np.array(r_train) #r_train = (r_train - r_train.min()) / (r_train.max() - r_train.min()) #size = int(x_train.shape[0] / 2) #x_train = np.concatenate((x_train, self.champion_x[:size])) #y_train = np.concatenate((y_train, self.champion_y[:size])) #r_train = np.concatenate((r_train, self.champion_r[:size])) r_train -= np.mean(r_train) r_train /= np.std(r_train) print(r_train.max(), r_train.min()) return x_train, y_train, r_train def __stackFrames(self, x, y, r, data): _, data = data[-1] x_train, y_train, r_train = data if x is None: x, y, r = np.array(x_train), np.array(y_train), np.array(r_train) return x, y, r x = np.concatenate((x, np.array(x_train))) y = np.concatenate((y, np.array(y_train))) r = np.concatenate((r, np.array(r_train))) return x, y, r def __diluteFrames(self): x_stack, y_stack, reward_stack = self.champion_x, self.champion_y, self.champion_r c = list(zip(x_stack.tolist(), y_stack.tolist(), reward_stack.tolist())) random.shuffle(c) x_stack, y_stack, reward_stack = zip(*c) self.champion_x, self.champion_y, self.champion_r = np.array(x_stack), np.array(y_stack), np.array(reward_stack) def __saveMetrics(self, episode_id, run_id, observation): # Display # print("Total reward: {}".format(int(self.reward_sum))) key_episode = str(episode_id % 10) name = key_episode+'_'+str(run_id) self.metrics[name] = { 'reward': self.reward_sum, 'nb_gate': observation['next_true_gate_id'], 'elapsed_time': self.drone_controller.sensors.secs - self.drone_controller.start_time, } def __doneCallbackSigmas(self, observation): if self.sigma_decay: if observation["next_true_gate_id"] == 10: self.__updateWeightsGates(observation["next_true_gate_id"]) else: self.__updateWeightsGates( observation["next_true_gate_id"], gate_passed=False ) def loadSigmas(self, file_name): """ Load sigma settings. """ data = sio.loadmat(file_name) self.weight_gates = data["weight_gates"][0] self.good_gate = data["good_gate"][0][0] self.bad_gate = data["bad_gate"][0][0] self.sigma = data["sigma"][0][0] self.sigmas = data["sigmas"][0] self.sigma_gamma = data["sigma_gamma"][0][0] self.sigma_delta = data["sigma_delta"][0][0] self.weight_max = np.ones([11, 1]) * data["weight_max"][0][0] self.weight_min = np.ones([11, 1]) * data["weight_min"][0][0] def saveSigmas(self, name=""): """ Save sigma settings. """ if self.sigma_decay: data = { "weight_gates": self.weight_gates, "good_gate": self.good_gate, "bad_gate": self.bad_gate, "sigma": self.sigma, "sigmas": self.sigmas, "sigma_gamma": self.sigma_gamma, "sigma_delta": self.sigma_delta, "weight_max": self.weight_max[0], "weight_min": self.weight_min[0], } sio.savemat( file_name="{}/{}_sigmas.mat".format(self.directory.rstrip("/"), name), mdict=data, ) def prepro(self, last_observation, observation): """ Get input vector from raw sensor data: input: - last_observation: sensor data from previous state, to compute speed - observation: sensor data to pre-process output: - out: the model's input vector containing processed data """ out = self.formater.sensorToData( last_observation, observation, self.nb_gates_nominal, self.nb_gates_ir, self.nominal_gates_ref, ) return out def pickStates(self): """ Select states according to the delta_t our lstm is trained on. This is necessary because our step method does not record states with the same frequency as our data recorder. Set self.delta_t to 0 to deactivate. """ out = [] states = self.all_observations t = states[-1]["secs"] out[0:0] = [self.formater.encode(self.formater.format_input(states[-1]))] for state in reversed(states[:-1]): if len(out) == self.nb_timestep: return np.array(out).reshape(1, self.nb_timestep, self.nb_features) if t - state["secs"] > self.delta_t: out[0:0] = [self.formater.encode(self.formater.format_input(state))] t = state["secs"] return None def forwardPass(self, states): """ Perform one forward pass from policy model, on nb_timesteps observations from all_observations. input: - self.all_observations: the list containing all the observed frames output: - action: a decision made on nb_timesteps observations """ action = self.model.predict(states)[0] return action def envReset(self, episode_id): """ Reset environment, and re-initialize variables used in main loop. """ perturbation_id = 2 if episode_id % 10 == 0: # if we have to reset simulation perturbation_id = 2 # perturbation_id = random.randint(0,24) # self.simulation_controller.restart(perturbation_id) # time.sleep(10) # TODO: Change this for process listener # dump data try: print(self.metrics) sio.savemat( file_name="{}/episode{}.mat".format(self.directory.rstrip("/"), episode_id), mdict=self.metrics, ) print("Saving done") except: pass self.metrics = {} self.metrics['map_id'] = perturbation_id def discountRewards(self): """ Take 1D float array of rewards and compute discounted reward. """ r = np.array(self.rewards) discounted_r = np.zeros_like(r) running_add = 0 # we go from last reward to first one so we don't have to do exponentiations for t in reversed(range(0, r.size)): running_add = ( running_add * self.gamma + r[t] ) # the point here is to use Horner's method to compute those rewards efficiently discounted_r[t] = running_add discounted_r += np.array(self.rewards_smooth) return discounted_r def greedyPolicy(self, action): """ Draw a random sample from a gaussian distribution centered at 'action', and with stddeviation 'sigmas'. """ # Si gates passed : update les sigmas # sigma / 1.001 ^ nb_run_passed (count du nb de run reussit) # Et on passe au sigma suivant ret = np.random.normal( action, self.sigmas[self.drone_controller.sensors.next_true_gate_id], 4 ) bounds = [[-1, 1], [-1, 1], [-1, 1], [-10, 30]] for i in range(len(ret)): if ret[i] < bounds[i][0]: ret[i] = bounds[i][0] elif ret[i] > bounds[i][1]: ret[i] = bounds[i][1] return np.array(ret) def run(self): """ Main loop used to launch the reinforcement learning. """ # Setting up our environment x_stack, y_stack, reward_stack = None, None, None episode_id = 0 run_id = 0 starting_action = [0, 0, 0, 10] run_data = [] self.max_reward, self.min_reward = 0, 0 self.x_train, self.y_train, self.rewards, self.rewards_smooth = [], [], [], [] self.reward_sum = 0 self.episode_reward, self.last_episode_reward = 0, 0 x_dict, y_dict, r_dict = {}, {}, {} self.envReset(episode_id=episode_id) last_observation = self.drone_controller.reset() observation, _, _ = self.drone_controller.step(None) action = starting_action prec = time.time() self.all_observations = [] print("\n{}Starting episode [{}]{}".format(PURP_DISP, episode_id, RESET_DISP)) print("\n{}Starting run [{}]{}".format(BLUE_DISP, run_id, RESET_DISP)) while True: prec = time.time() # Add current obs to all_obs, make a decision on it # 0.001 curr_state = self.prepro(last_observation, observation) # sigma decay if activated if ( self.sigma_decay and last_observation["next_true_gate_id"] != observation["next_true_gate_id"] ): self.__updateWeightsGates(last_observation["next_true_gate_id"]) self.all_observations.append(curr_state) # Predict action from state, and transform deterministic to stochastic # 0.01 states = self.pickStates() if states is not None: action = self.forwardPass(states) action = self.greedyPolicy(action) action = self.formater.decode(action) # Log the input and label to train later self.x_train.append(states) self.y_train.append(action) # Remember current observation, to compute speed in prepro last_observation = observation # Do one step in the Flightgoggles environment # 0.1 observation, reward, done = self.drone_controller.step(action) if states is not None: self.rewards.append(reward[0]) self.rewards_smooth.append(reward[1]) self.reward_sum += reward[0] if self.render: self.drone_controller.render(states[0][-1], action) if done: # Drone end of run if not len(self.x_train): self.all_observations = [] observation = self.drone_controller.reset() action = None continue # Wait for collision self.drone_controller.waitReset() # Update sigma self.__doneCallbackSigmas(observation) self.__saveMetrics(episode_id, run_id, observation) run_id += 1 self.episode_reward += self.reward_sum run_data.append((observation['next_true_gate_id'], (self.x_train, self.y_train, self.discountRewards()))) #if self.render: # self.drone_controller.renderDiscount(discount_rewards) # give the time to reset the drone position time.sleep(1.0) # Training if run_id % 10 == 0: #x_stack, y_stack, reward_stack, x_train, y_train, r_train = self.__prepareTraining(x_stack, y_stack, reward_stack, run_data) x_train, y_train, r_train = self.__prepareTraining(run_data) self.model.fit( x=np.vstack(x_train.tolist()), y=np.vstack(y_train.tolist()), verbose=1, sample_weight=r_train, epochs=episode_id + 1, initial_epoch=episode_id, batch_size=8, callbacks=self.callbacks, ) # x_stack, y_stack, reward_stack = self.__diluteFrames(x_stack, y_stack, reward_stack) # Save model if self.episode_reward > self.last_episode_reward or episode_id == 0: print("Saving weights") self.model.save(self.directory.rstrip('/') + "/model{}_{}.h5".format(int(episode_id), int(self.episode_reward))) self.last_episode_reward = self.episode_reward self.episode_reward = 0 # Save settings of sigma every 10 epochs self.saveSigmas(name=self.experimentname) # Reinitialization self.envReset(episode_id=episode_id) episode_id += 1 print("\n{}Starting episode [{}]{}".format(PURP_DISP, episode_id, RESET_DISP)) run_data = [] if run_id >= 300: exit() # reset after training self.x_train, self.y_train, self.rewards, self.rewards_smooth = [], [], [], [] self.reward_sum = 0 self.all_observations = [] observation = self.drone_controller.reset() action = starting_action print("\n{}Starting run [{}]{}".format(BLUE_DISP, run_id, RESET_DISP))
class GroundTruth(object): """docstring for GroundTruthUtils.""" def __init__(self, path=None, json_file=None, yaml_file=None, split=0.1, nb_timesteps=6): self.split = split # Search json and yaml in path if path not None # else use directly json_file and yaml_file if path: path = path.rstrip('/') json_file = [file for file in glob("{}/*.json".format(path))] yaml_file = [file for file in glob("{}/*.yaml".format(path))] if len(json_file) != 1: print("No json or more than one file in the specified path.") exit(1) if len(yaml_file) != 1: print("No yaml or more than one file in the specified path.") exit(1) json_file = json_file[0] yaml_file = yaml_file[0] self.path = path self.json_file = json_file self.yaml_file = yaml_file self.data_formater = DataFormater(nb_timesteps=nb_timesteps) self.data_loader = DataLoader(json_file=json_file, yaml_file=yaml_file, nb_gates_nominal=2, nb_gates_ir=1) data = np.array(self.data_loader.data) self.init(data) def init(self, data): bad_runs = [ 3, 11, 25, 28, 46, 50, 51, 54, 55, 58, 59, 76, 80, ] # Clean data and store it in a nice dictionnary self.data = {} for frame in data: if int(frame['run_id']) in bad_runs: continue if frame['run_id'] not in self.data: self.data[frame['run_id']] = [] id = frame['run_id'] del frame['run_id'] self.data[id].append(frame) self.data = self.__cleanRuns(self.data) # Store informations self.nb_runs = len(self.data) self.nb_frames = 0 # TODO: Add this # Set the idxs for the input generator self.idxs = [] for idx in self.data.keys(): self.idxs += [(idx, n) for n in range(len(self.data[idx]))] self.idxs = np.array(self.idxs) self.__split() def __formatFrames(self, raw_data): for key, val in raw_data.items(): data = raw_data[key] # data est une liste avec toutes les frames data_array = [] for frame in data: # INPUT if not frame['eulerDroneNominal'][0]: continue input_array = self.data_formater.format_input(frame) # OUTPUT output_array = self.data_formater.format_output(frame) data_array.append([input_array, output_array]) raw_data[key] = np.array(data_array) return raw_data def __cleanRuns(self, raw_data): raw_data = remove_empty_frames(raw_data) raw_data = self.__formatFrames(raw_data) return raw_data def __split(self): self.nb_sample = self.idxs.shape[0] size_valid = int(self.split * self.nb_sample) size_train = self.nb_sample - size_valid self.idxs_train = self.idxs[size_valid:] self.idxs_valid = self.idxs[0:size_valid]