def setup(self, path_to_config_file): yaml_conf, checkpoint_number = checkpoint_parse_configuration_file(path_to_config_file) # Take the checkpoint name and load it checkpoint = torch.load(os.path.join(os.sep, os.path.join(*os.path.realpath(__file__).split(os.sep)[:-2]), '_logs', yaml_conf.split(os.sep)[-2], yaml_conf.split('/')[-1].split('.')[-2] , 'checkpoints', str(checkpoint_number) + '.pth')) # do the merge here merge_with_yaml(os.path.join(os.sep, os.path.join(*os.path.realpath(__file__).split(os.sep)[:-2]), yaml_conf)) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 self.track = 2 # Track.CAMERAS
def __init__(self, checkpoint): #experiment_name='None', driver_conf=None, memory_fraction=0.18, #image_cut=[115, 510]): # use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]): Agent.__init__(self) # This should likely come from global #config_gpu = tf.ConfigProto() #config_gpu.gpu_options.visible_device_list = '0' #config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction #self._sess = tf.Session(config=config_gpu) # THIS DOES NOT WORK FOR FUSED PLUS LSTM #if self._config.number_frames_sequenced > self._config.number_frames_fused: # self._config_train.batch_size = self._config.number_frames_sequenced #else: # self._config_train.batch_size = self._config.number_frames_fused #self._train_manager = load_system(self._config_train) #self._config.train_segmentation = False self.model = CoILModel(g_conf.MODEL_NAME) self.model.load_state_dict(checkpoint['state_dict']) self.model.cuda() self.model.eval()
def __init__(self, checkpoint, town_name, carla_version='0.84'): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() # this entire segment is for loading models for ensemble evaluation - take care for the paths and checkpoints ''' self.weights = [0.25, 0.25, 0.25, 0.25] # simple ensemble self.model_ids = ['660000', '670000', '1070000', '2640000'] # model checkpoints self.models_dir = '/is/sg2/aprakash/Projects/carla_autonomous_driving/code/coiltraine/_logs/ensemble' self._ensemble_model_list = [] for i in range(len(self.model_ids)): curr_checkpoint = torch.load(self.models_dir+'/resnet34imnet10S1/checkpoints/'+self.model_ids[i]+'.pth') self._ensemble_model_list.append(CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION)) self._ensemble_model_list[i].load_state_dict(curr_checkpoint['state_dict']) self._ensemble_model_list[i].cuda().eval() ''' self.latest_image = None self.latest_image_tensor = None # for image corruptions self.corruption_number = None self.severity = None if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: # for evaluating expert self.control_agent = CommandFollower(town_name)
def setup(self, path_to_config_file): yaml_conf, checkpoint_number = checkpoint_parse_configuration_file(path_to_config_file) # Take the checkpoint name and load it checkpoint = torch.load(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2] , 'checkpoints', str(checkpoint_number) + '.pth')) # merge the specific agent config with global config _g_conf merge_with_yaml(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), yaml_conf)) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. # TODO: retrain the model with MPSC self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 # check map waypoint format => carla_data_provider & http://carla.org/2018/11/16/release-0.9.1/ # e.g. from map.get_waypoint Waypoint(Transform(Location(x=338.763, y=226.453, z=0), Rotation(pitch=360, yaw=270.035, roll=0))) self.track = Track.ALL_SENSORS_HDMAP_WAYPOINTS # specify available track info, see autonomous_agent.py
def __init__(self, checkpoint): Agent.__init__(self) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self.model = CoILModel(g_conf.MODEL_NAME) self.model.load_state_dict(checkpoint['state_dict']) self.model.cuda()
def setup(self, path_to_config_file): yaml_conf, checkpoint_number = checkpoint_parse_configuration_file(path_to_config_file) # Take the checkpoint name and load it checkpoint = torch.load(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2] , 'checkpoints', str(checkpoint_number) + '.pth')) # do the merge here merge_with_yaml(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), yaml_conf)) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() # Set ERFnet for segmentation self.model_erf = ERFNet(20) self.model_erf = torch.nn.DataParallel(self.model_erf) self.model_erf = self.model_erf.cuda() print("LOAD ERFNet - drive") def load_my_state_dict(model, state_dict): #custom function to load model when not all dict elements own_state = model.state_dict() for name, param in state_dict.items(): if name not in own_state: continue own_state[name].copy_(param) return model self.model_erf = load_my_state_dict(self.model_erf, torch.load(os.path.join('trained_models/erfnet_pretrained.pth'))) self.model_erf.eval() print ("ERFNet and weights LOADED successfully") self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 self.track = Track.CAMERAS
def __init__(self, checkpoint, town_name, carla_version='0.84'): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. # Create model self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() # If we are evaluating squeeze model (so we are using ground truth seg mask), # also run the autopilot to get its stop intentions if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE or "seg" in g_conf.SENSORS.keys(): self.control_agent = CommandFollower(town_name)
def __init__(self, checkpoint, town_name, carla_version='0.84'): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: self.control_agent = CommandFollower(town_name)
def __init__(self, checkpoint, town_name, carla_version='0.84'): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() # Set ERFnet for segmentation self.model_erf = ERFNet(20) self.model_erf = torch.nn.DataParallel(self.model_erf) self.model_erf.cuda() print("LOAD ERFNet - validate") def load_my_state_dict( model, state_dict ): #custom function to load model when not all dict elements own_state = model.state_dict() for name, param in state_dict.items(): if name not in own_state: continue own_state[name].copy_(param) return model self.model_erf = load_my_state_dict( self.model_erf, torch.load(os.path.join('trained_models/erfnet_pretrained.pth'))) self.model_erf.eval() print("ERFNet and weights LOADED successfully") self.latest_image = None self.latest_image_tensor = None if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: self.control_agent = CommandFollower(town_name)
def __init__(self, checkpoint, town_name, carla_version='0.84', vae_params=None): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self._vae_params = vae_params if g_conf.VAE_MODEL_CONFIGURATION != {}: # adding VAE model self._VAE_model = CoILModel('VAE', g_conf.VAE_MODEL_CONFIGURATION) self._VAE_model.cuda() VAE_checkpoint = torch.load( os.path.join('_logs', vae_params['vae_folder'], vae_params['vae_exp'], 'checkpoints', str(vae_params['vae_checkpoint']) + '.pth')) print( "VAE model ", str(vae_params['vae_checkpoint']), " already loaded from ", os.path.join('_logs', vae_params['vae_folder'], vae_params['vae_exp'], 'checkpoints')) self._VAE_model.load_state_dict(VAE_checkpoint['state_dict']) self._VAE_model.eval() self.latest_image = None self.latest_image_tensor = None if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: self.control_agent = CommandFollower(town_name)
def __init__(self, checkpoint, architecture_name): #experiment_name='None', driver_conf=None, memory_fraction=0.18, #image_cut=[115, 510]): # use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]): Agent.__init__(self) # This should likely come from global #config_gpu = tf.ConfigProto() #config_gpu.gpu_options.visible_device_list = '0' #config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction #self._sess = tf.Session(config=config_gpu) # THIS DOES NOT WORK FOR FUSED PLUS LSTM #if self._config.number_frames_sequenced > self._config.number_frames_fused: # self._config_train.batch_size = self._config.number_frames_sequenced #else: # self._config_train.batch_size = self._config.number_frames_fused #self._train_manager = load_system(self._config_train) #self._config.train_segmentation = False self.architecture_name = architecture_name if architecture_name == 'coil_unit': self.model_task, self.model_gen = CoILModel('coil_unit') self.model_task, self.model_gen = self.model_task.cuda( ), self.model_gen.cuda() elif architecture_name == 'unit_task_only': self.model_task, self.model_gen = CoILModel('unit_task_only') self.model_task, self.model_gen = self.model_task.cuda( ), self.model_gen.cuda() else: self.model = CoILModel(architecture_name) self.model.cuda() if architecture_name == 'wgangp_lsd': # print(ckpt, checkpoint['best_loss_iter_F']) self.model.load_state_dict(checkpoint['stateF_dict']) self.model.eval() elif architecture_name == 'coil_unit': self.model_task.load_state_dict(checkpoint['task']) self.model_gen.load_state_dict(checkpoint['b']) self.model_task.eval() self.model_gen.eval() elif architecture_name == 'coil_icra': self.model.load_state_dict(checkpoint['state_dict']) self.model.eval() elif architecture_name == 'unit_task_only': self.model_task.load_state_dict(checkpoint['task_state_dict']) self.model_gen.load_state_dict(checkpoint['enc_state_dict']) self.model_task.eval() self.model_gen.eval()
class CoILAgent(object): def __init__(self, checkpoint, town_name, carla_version='0.84', vae_params=None): # Set the carla version that is going to be used by the interface self._carla_version = carla_version self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self._vae_params = vae_params if g_conf.VAE_MODEL_CONFIGURATION != {}: # adding VAE model self._VAE_model = CoILModel('VAE', g_conf.VAE_MODEL_CONFIGURATION) self._VAE_model.cuda() VAE_checkpoint = torch.load( os.path.join('_logs', vae_params['vae_folder'], vae_params['vae_exp'], 'checkpoints', str(vae_params['vae_checkpoint']) + '.pth')) print( "VAE model ", str(vae_params['vae_checkpoint']), " already loaded from ", os.path.join('_logs', vae_params['vae_folder'], vae_params['vae_exp'], 'checkpoints')) self._VAE_model.load_state_dict(VAE_checkpoint['state_dict']) self._VAE_model.eval() self.latest_image = None self.latest_image_tensor = None if g_conf.USE_ORACLE or g_conf.USE_FULL_ORACLE: self.control_agent = CommandFollower(town_name) def run_step(self, measurements, sensor_data, directions, target): """ Run a step on the benchmark simulation Args: measurements: All the float measurements from CARLA ( Just speed is used) sensor_data: All the sensor data used on this benchmark directions: The directions, high level commands target: Final objective. Not used when the agent is predicting all outputs. Returns: Controls for the vehicle on the CARLA simulator. """ # Take the forward speed and normalize it for it to go from 0-1 norm_speed = measurements.player_measurements.forward_speed / g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # Compute the forward pass processing the sensors got from CARLA. if g_conf.VAE_MODEL_CONFIGURATION != {}: input_data = self._process_sensors(sensor_data) _, _, _, z = self._VAE_model(input_data) model_outputs = self._model.forward_branch(z, norm_speed, directions_tensor) else: model_outputs = self._model.forward_branch( self._process_sensors(sensor_data), norm_speed, directions_tensor) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) if self._carla_version == '0.9': import carla control = carla.VehicleControl() else: control = VehicleControl() control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) # There is the posibility to replace some of the predictions with oracle predictions. if g_conf.USE_ORACLE: _, control.throttle, control.brake = self._get_oracle_prediction( measurements, target) if self.first_iter: coil_logger.add_message('Iterating', { "Checkpoint": self.checkpoint['iteration'], 'Agent': str(steer) }, self.checkpoint['iteration']) self.first_iter = False return control def get_attentions(self, layers=None): """ Returns The activations obtained from the first layers of the latest iteration. """ if layers is None: layers = [0, 1, 2] if self.latest_image_tensor is None: raise ValueError( 'No step was ran yet. ' 'No image to compute the activations, Try Running ') all_layers = self._model.get_perception_layers( self.latest_image_tensor) cmap = plt.get_cmap('inferno') attentions = [] for layer in layers: y = all_layers[layer] att = torch.abs(y).mean(1)[0].data.cpu().numpy() att = att / att.max() att = cmap(att) att = np.delete(att, 3, 2) attentions.append(imresize(att, [88, 200])) return attentions def _process_sensors(self, sensors): iteration = 0 for name, size in g_conf.SENSORS.items(): if self._carla_version == '0.9': sensor = sensors[name][g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] else: sensor = sensors[name].data[ g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], ...] sensor = scipy.misc.imresize(sensor, (size[1], size[2])) self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type( torch.FloatTensor).cuda() if iteration == 0: image_input = sensor else: image_input = torch.cat((image_input, sensor), 0) iteration += 1 image_input = image_input.unsqueeze(0) self.latest_image_tensor = image_input return image_input def _process_model_outputs(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] if brake < 0.05: brake = 0.0 if throttle > brake: brake = 0.0 return steer, throttle, brake def _process_model_outputs_wp(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ wpa1, wpa2, throttle, brake = outputs[3], outputs[4], outputs[ 1], outputs[2] if brake < 0.2: brake = 0.0 if throttle > brake: brake = 0.0 steer = 0.7 * wpa2 if steer > 0: steer = min(steer, 1) else: steer = max(steer, -1) return steer, throttle, brake def _get_oracle_prediction(self, measurements, target): # For the oracle, the current version of sensor data is not really relevant. control, _, _, _, _ = self.control_agent.run_step( measurements, [], [], target) return control.steer, control.throttle, control.brake
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12, encoder_params=None): """ The main training function. This functions loads the latest checkpoint for a given, exp_batch (folder) and exp_alias (experiment configuration). With this checkpoint it starts from the beginning or continue some training. Args: gpu: The GPU number exp_batch: the folder with the experiments exp_alias: the alias, experiment name suppress_output: if the output are going to be saved on a file number_of_workers: the number of threads used for data loading Returns: None """ try: # We set the visible cuda devices to select the GPU os.environ["CUDA_VISIBLE_DEVICES"] = gpu g_conf.VARIABLE_WEIGHT = {} # At this point the log file with the correct naming is created. # You merge the yaml file with the global configuration structure. merge_with_yaml( os.path.join('configs', exp_batch, exp_alias + '.yaml'), encoder_params) set_type_of_process('train') # Set the process into loading status. coil_logger.add_message('Loading', {'GPU': os.environ["CUDA_VISIBLE_DEVICES"]}) seed_everything(seed=g_conf.MAGICAL_SEED) # Put the output to a separate file if it is the case if suppress_output: if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if coil_logger.check_finish('train'): coil_logger.add_message('Finished', {}) return # Preload option print(" GOING TO LOAD") if g_conf.PRELOAD_MODEL_ALIAS is not None: print(" LOADING A PRELOAD") checkpoint = torch.load( os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH, g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints', str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth')) else: # Get the latest checkpoint to be loaded # returns none if there are no checkpoints saved for this model checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file is not None: print('loading previous checkpoint ', checkpoint_file) checkpoint = torch.load( os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME, g_conf.EXPERIMENT_NAME, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] else: iteration = 0 best_loss = 100000000.0 best_loss_iter = 0 # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the positions from the root directory as a in a vector. #full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) # By instantiating the augmenter we get a callable that augment images and transform them # into tensors. augmenter = Augmenter(g_conf.AUGMENTATION) # We can save preload dataset depends on the json file name, then no need to load dataset for each time with the same dataset if len(g_conf.EXPERIENCE_FILE) == 1: json_file_name = str( g_conf.EXPERIENCE_FILE[0]).split('/')[-1].split('.')[-2] else: json_file_name = str(g_conf.EXPERIENCE_FILE[0]).split( '/')[-1].split('.')[-2] + '_' + str( g_conf.EXPERIENCE_FILE[1]).split('/')[-1].split('.')[-2] dataset = CoILDataset(transform=augmenter, preload_name=g_conf.PROCESS_NAME + '_' + json_file_name + '_' + g_conf.DATA_USED) #dataset = CoILDataset(transform=augmenter, preload_name=str(g_conf.NUMBER_OF_HOURS)+ 'hours_' + g_conf.TRAIN_DATASET_NAME) print("Loaded Training dataset") data_loader = select_balancing_strategy(dataset, iteration, number_of_workers) if g_conf.MODEL_TYPE in ['separate-affordances']: model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) model.cuda() optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE) print(model) # we use the pre-trained encoder model to extract bottleneck Z and train the E-t-E model if g_conf.MODEL_TYPE in ['separate-affordances']: encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) encoder_model.cuda() encoder_model.eval() # To freeze the pre-trained encoder model if g_conf.FREEZE_ENCODER: for param_ in encoder_model.parameters(): param_.requires_grad = False if encoder_params is not None: encoder_checkpoint = torch.load( os.path.join( '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) print( "Encoder model ", str(encoder_params['encoder_checkpoint']), "loaded from ", os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints')) encoder_model.load_state_dict(encoder_checkpoint['state_dict']) if g_conf.FREEZE_ENCODER: encoder_model.eval() # To freeze the pre-trained encoder model for param_ in encoder_model.parameters(): param_.requires_grad = False else: optimizer = optim.Adam(list(model.parameters()) + list(encoder_model.parameters()), lr=g_conf.LEARNING_RATE) for name_encoder, param_encoder in encoder_model.named_parameters( ): if param_encoder.requires_grad: print(' Unfrozen layers', name_encoder) else: print(' Frozen layers', name_encoder) if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) accumulated_time = checkpoint['total_time'] loss_window = coil_logger.recover_loss_window('train', iteration) else: # We accumulate iteration time and keep the average speed accumulated_time = 0 loss_window = [] for name, param in model.named_parameters(): if param.requires_grad: print(' Unfrozen layers', name) else: print(' Frozen layers', name) print("Before the loss") # Loss time series window for data in data_loader: # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times, # add a stop on the _logs folder that is going to be read by this process if g_conf.FINISH_ON_VALIDATION_STALE is not None and \ check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE): break """ #################################### Main optimization loop #################################### """ if iteration % 1000 == 0: adjust_learning_rate_auto(optimizer, loss_window) model.zero_grad() if not g_conf.FREEZE_ENCODER: encoder_model.zero_grad() if g_conf.LABELS_SUPERVISED: inputs_data = torch.cat( (data['rgb'], torch.zeros(g_conf.BATCH_SIZE, 1, 88, 200)), dim=1).cuda() else: inputs_data = torch.squeeze(data['rgb'].cuda()) if g_conf.MODEL_TYPE in ['separate-affordances']: #TODO: for this two encoder models training, we haven't put speed as input to train yet if g_conf.ENCODER_MODEL_TYPE in [ 'action_prediction', 'stdim', 'forward', 'one-step-affordances' ]: e, inter = encoder_model.forward_encoder( inputs_data, dataset.extract_inputs(data).cuda(), # We also add measurements and commands torch.squeeze(dataset.extract_commands(data).cuda())) elif g_conf.ENCODER_MODEL_TYPE in ['ETE']: e, inter = encoder_model.forward_encoder( inputs_data, dataset.extract_inputs(data).cuda(), torch.squeeze(dataset.extract_commands(data).cuda())) loss_function_params = { 'classification_gt': dataset.extract_affordances_targets( data, 'classification').cuda(), # harzard stop, red_light.... 'class_weights': g_conf.AFFORDANCES_CLASS_WEIGHT, 'regression_gt': dataset.extract_affordances_targets(data, 'regression').cuda(), 'variable_weights': g_conf.AFFORDANCES_VARIABLE_WEIGHT } loss = model(e, loss_function_params) loss.backward() optimizer.step() else: raise RuntimeError( 'Not implement yet, this branch is only work for g_conf.MODEL_TYPE in [separate-affordances]' ) """ #################################### Saving the model if necessary #################################### """ if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save( state, os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME, g_conf.EXPERIMENT_NAME, 'checkpoints', str(iteration) + '.pth')) if not g_conf.FREEZE_ENCODER: encoder_state = { 'iteration': iteration, 'state_dict': encoder_model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save( encoder_state, os.path.join('_logs', g_conf.EXPERIMENT_BATCH_NAME, g_conf.EXPERIMENT_NAME, 'checkpoints', str(iteration) + '_encoder.pth')) iteration += 1 """ ################################################ Adding tensorboard logs. Making calculations for logging purposes. These logs are monitored by the printer module. ################################################# """ coil_logger.add_scalar('Loss', loss.data, iteration) coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration) if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration if iteration % 100 == 0: print('Train Iteration: {} [{}/{} ({:.0f}%)] \t Loss: {:.6f}'. format(iteration, iteration, g_conf.NUMBER_ITERATIONS, 100. * iteration / g_conf.NUMBER_ITERATIONS, loss.data)) coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except RuntimeError as e: coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
def execute(gpu, exp_batch, exp_alias, dataset_name): # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = '0' # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if monitorer.get_status(exp_batch, exp_alias + '.yaml', g_conf.PROCESS_NAME)[0] == "Finished": # TODO: print some cool summary or not ? return #Define the dataset. This structure is has the __get_item__ redefined in a way #that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) print(full_dataset) dataset = CoILDataset(full_dataset, transform=transforms.Compose([transforms.ToTensor() ])) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. # TODO: batch size an number of workers go to some configuration file data_loader = torch.utils.data.DataLoader(dataset, batch_size=120, shuffle=False, num_workers=12, pin_memory=True) # TODO: here there is clearly a posibility to make a cool "conditioning" system. model = CoILModel(g_conf.MODEL_NAME) model.cuda() model.eval() criterion = Loss() latest = get_latest_evaluated_checkpoint() if latest is None: # When nothing was tested, get latest returns none, we fix that. latest = 0 latest = 200000 best_loss = 1000.0 best_error = 1000.0 best_loss_iter = 0 best_error_iter = 0 print(dataset.meta_data[0][0]) for k in dataset.meta_data: k[0] = str(k[0], 'utf-8') print(dataset.meta_data[0][0]) cpts = glob.glob( '/home-local/rohitrishabh/coil_20-06/_logs/eccv/experiment_1/checkpoints/*.pth' ) # while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): for ckpt in cpts: # if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): # latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) latest = int(ckpt[-10:-4]) # checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias # , 'checkpoints', str(latest) + '.pth')) checkpoint = torch.load(ckpt) checkpoint_iteration = checkpoint['iteration'] print("Validation loaded ", checkpoint_iteration) accumulated_loss = 0.0 accumulated_error = 0.0 iteration_on_checkpoint = 0 for data in data_loader: input_data, float_data = data control_position = np.where( dataset.meta_data[:, 0] == 'control')[0][0] speed_position = np.where( dataset.meta_data[:, 0] == 'speed_module')[0][0] # print (torch.squeeze(input_data['rgb']).shape) # print (control_position) # print (speed_position) # Obs : Maybe we could also check for other branches ?? output = model.forward_branch( torch.squeeze(input_data['rgb']).cuda(), float_data[:, speed_position, :].cuda(), float_data[:, control_position, :].cuda()) for i in range(input_data['rgb'].shape[0]): coil_logger.write_on_csv( checkpoint_iteration, [output[i][0], output[i][1], output[i][2]]) # TODO: Change this a functional standard using the loss functions. loss = torch.mean( (output - dataset.extract_targets(float_data).cuda())**2).data.tolist() mean_error = torch.mean( torch.abs( output - dataset.extract_targets(float_data).cuda())).data.tolist() accumulated_error += mean_error accumulated_loss += loss error = torch.abs(output - dataset.extract_targets(float_data).cuda()) # Log a random position position = random.randint(0, len(float_data) - 1) #print (output[position].data.tolist()) coil_logger.add_message( 'Iterating in Validation', { 'Checkpoint': latest, 'Iteration': (str(iteration_on_checkpoint * 120) + '/' + str(len(dataset))), 'MeanError': mean_error, 'Loss': loss, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets(float_data) [position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(float_data)[position].data.tolist() }, latest) iteration_on_checkpoint += 1 checkpoint_average_loss = accumulated_loss / len(dataset) checkpoint_average_error = accumulated_error / len(dataset) coil_logger.add_scalar('Loss', checkpoint_average_loss, latest) coil_logger.add_scalar('Error', checkpoint_average_error, latest) print('Loss: ', checkpoint_average_loss, "----Error: ", checkpoint_average_error) if checkpoint_average_loss < best_loss: best_loss = checkpoint_average_loss best_loss_iter = latest state = { 'state_dict': model.state_dict(), 'best_loss': best_loss, 'best_loss_iter': best_loss_iter } # TODO : maybe already summarize the best model ??? torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'best_model_l2' + '.pth')) if checkpoint_average_error < best_error: best_error = checkpoint_average_error best_error_iter = latest state = { 'state_dict': model.state_dict(), 'best_error': best_error, 'best_error_iter': best_error_iter } # TODO : maybe already summarize the best model ??? torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'best_model_l1' + '.pth')) print('Best Loss: ', best_loss, "Checkpoint", best_loss_iter) print('Best Error: ', best_error, "Checkpoint", best_error_iter) coil_logger.add_message( 'Iterating in Validation', { 'Summary': { 'Error': checkpoint_average_error, 'Loss': checkpoint_average_loss, 'BestError': best_error, 'BestLoss': best_loss, 'BestLossCheckpoint': best_loss_iter, 'BestErrorCheckpoint': best_error_iter }, 'Checkpoint': latest })
def execute(gpu, exp_batch, exp_alias, dataset_name, suppress_output): latest = None try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) # The validation dataset is always fully loaded, so we fix a very high number of hours g_conf.NUMBER_OF_HOURS = 10000 set_type_of_process('validation', dataset_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=dataset_name) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) # Set ERFnet for segmentation model_erf = ERFNet(20) model_erf = torch.nn.DataParallel(model_erf) model_erf = model_erf.cuda() print("LOAD ERFNet - validate") def load_my_state_dict( model, state_dict ): #custom function to load model when not all dict elements own_state = model.state_dict() for name, param in state_dict.items(): if name not in own_state: continue own_state[name].copy_(param) return model model_erf = load_my_state_dict( model_erf, torch.load(os.path.join('trained_models/erfnet_pretrained.pth'))) model_erf.eval() print("ERFNet and weights LOADED successfully") # The window used to keep track of the trainings l1_window = [] latest = get_latest_evaluated_checkpoint() if latest is not None: # When latest is noe l1_window = coil_logger.recover_loss_window(dataset_name, None) model.cuda() best_mse = 1000 best_error = 1000 best_mse_iter = 0 best_error_iter = 0 while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(latest) + '.pth')) checkpoint_iteration = checkpoint['iteration'] print("Validation loaded ", checkpoint_iteration) model.load_state_dict(checkpoint['state_dict']) model.eval() accumulated_mse = 0 accumulated_error = 0 iteration_on_checkpoint = 0 for data in data_loader: # Compute the forward pass on a batch from the validation dataset controls = data['directions'] # Seg batch rgbs = data['rgb'] with torch.no_grad(): outputs = model_erf(rgbs) labels = outputs.max(1)[1].byte().cpu().data seg_road = (labels == 0) seg_not_road = (labels != 0) seg = torch.stack((seg_road, seg_not_road), 1).float() output = model.forward_branch( torch.squeeze(seg).cuda(), dataset.extract_inputs(data).cuda(), controls) # output = model.foward_branch(torch.squeeze(rgbs).cuda(), # dataset.extract_inputs(data).cuda(),controls) # It could be either waypoints or direct control if 'waypoint1_angle' in g_conf.TARGETS: write_waypoints_output(checkpoint_iteration, output) else: write_regular_output(checkpoint_iteration, output) mse = torch.mean( (output - dataset.extract_targets(data).cuda() )**2).data.tolist() mean_error = torch.mean( torch.abs(output - dataset.extract_targets(data).cuda()) ).data.tolist() accumulated_error += mean_error accumulated_mse += mse error = torch.abs(output - dataset.extract_targets(data).cuda()) # Log a random position position = random.randint(0, len(output.data.tolist()) - 1) coil_logger.add_message( 'Iterating', { 'Checkpoint': latest, 'Iteration': (str(iteration_on_checkpoint * 120) + '/' + str(len(dataset))), 'MeanError': mean_error, 'MSE': mse, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets( data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data) [position].data.tolist() }, latest) iteration_on_checkpoint += 1 print("Iteration %d on Checkpoint %d : Error %f" % (iteration_on_checkpoint, checkpoint_iteration, mean_error)) """ ######## Finish a round of validation, write results, wait for the next ######## """ checkpoint_average_mse = accumulated_mse / (len(data_loader)) checkpoint_average_error = accumulated_error / ( len(data_loader)) coil_logger.add_scalar('Loss', checkpoint_average_mse, latest, True) coil_logger.add_scalar('Error', checkpoint_average_error, latest, True) if checkpoint_average_mse < best_mse: best_mse = checkpoint_average_mse best_mse_iter = latest if checkpoint_average_error < best_error: best_error = checkpoint_average_error best_error_iter = latest coil_logger.add_message( 'Iterating', { 'Summary': { 'Error': checkpoint_average_error, 'Loss': checkpoint_average_mse, 'BestError': best_error, 'BestMSE': best_mse, 'BestMSECheckpoint': best_mse_iter, 'BestErrorCheckpoint': best_error_iter }, 'Checkpoint': latest }, latest) l1_window.append(checkpoint_average_error) coil_logger.write_on_error_csv(dataset_name, checkpoint_average_error) # If we are using the finish when validation stops, we check the current if g_conf.FINISH_ON_VALIDATION_STALE is not None: if dlib.count_steps_without_decrease(l1_window) > 3 and \ dlib.count_steps_without_decrease_robust(l1_window) > 3: coil_logger.write_stop(dataset_name, latest) break else: latest = get_latest_evaluated_checkpoint() time.sleep(1) coil_logger.add_message('Loading', {'Message': 'Waiting Checkpoint'}) print("Waiting for the next Validation") coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest) except RuntimeError as e: if latest is not None: coil_logger.erase_csv(latest) coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest)
def execute(gpu, exp_batch, exp_alias, json_file_path, suppress_output, encoder_params=None, plot_attentions=False): try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu if json_file_path is not None: json_file_name = json_file_path.split('/')[-1].split('.')[-2] else: raise RuntimeError( "You need to define the validation json file path") # At this point the log file with the correct naming is created. merge_with_yaml( os.path.join('configs', exp_batch, exp_alias + '.yaml'), encoder_params) if plot_attentions: set_type_of_process('validation', json_file_name + '_plotAttention') else: set_type_of_process('validation', json_file_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) # We create file for saving validation results summary_file = os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, g_conf.PROCESS_NAME + '_csv', 'valid_summary_1camera.csv') g_conf.immutable(False) g_conf.DATA_USED = 'central' g_conf.immutable(True) if not os.path.exists(summary_file): csv_outfile = open(summary_file, 'w') csv_outfile.write( "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" % ('step', 'accumulated_pedestrian_TP', 'accumulated_pedestrian_FP', 'accumulated_pedestrian_FN', 'accumulated_pedestrian_TN', 'accumulated_vehicle_stop_TP', 'accumulated_vehicle_stop_FP', 'accumulated_vehicle_stop_FN', 'accumulated_vehicle_stop_TN', 'accumulated_red_tl_TP', 'accumulated_red_tl_FP', 'accumulated_red_tl_FN', 'accumulated_red_tl_TN', 'MAE_relative_angle')) csv_outfile.close() latest = get_latest_evaluated_checkpoint_2(summary_file) # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. #full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(transform=augmenter, preload_name=g_conf.PROCESS_NAME + '_' + g_conf.DATA_USED, process_type='validation', vd_json_file_path=json_file_path) print("Loaded Validation dataset") # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) if g_conf.MODEL_TYPE in ['one-step-affordances']: # one step training, no need to retrain FC layers, we just get the output of encoder model as prediciton model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) model.cuda() #print(model) elif g_conf.MODEL_TYPE in ['separate-affordances']: model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) model.cuda() #print(model) encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) encoder_model.cuda() encoder_model.eval() # Here we load the pre-trained encoder (not fine-tunned) if g_conf.FREEZE_ENCODER: if encoder_params is not None: encoder_checkpoint = torch.load( os.path.join( '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) print( "Encoder model ", str(encoder_params['encoder_checkpoint']), "loaded from ", os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints')) encoder_model.load_state_dict( encoder_checkpoint['state_dict']) encoder_model.eval() for param_ in encoder_model.parameters(): param_.requires_grad = False while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): latest = get_next_checkpoint_2(g_conf.TEST_SCHEDULE, summary_file) if os.path.exists( os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, 'checkpoints', str(latest) + '.pth')): checkpoint = torch.load( os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, 'checkpoints', str(latest) + '.pth')) checkpoint_iteration = checkpoint['iteration'] model.load_state_dict(checkpoint['state_dict']) print("Validation checkpoint ", checkpoint_iteration) model.eval() for param_ in model.parameters(): param_.requires_grad = False # Here we load the fine-tunned encoder if not g_conf.FREEZE_ENCODER and g_conf.MODEL_TYPE not in [ 'one-step-affordances' ]: encoder_checkpoint = torch.load( os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, 'checkpoints', str(latest) + '_encoder.pth')) print( "FINE TUNNED encoder model ", str(latest) + '_encoder.pth', "loaded from ", os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, 'checkpoints')) encoder_model.load_state_dict( encoder_checkpoint['state_dict']) encoder_model.eval() for param_ in encoder_model.parameters(): param_.requires_grad = False accumulated_mae_ra = 0 accumulated_pedestrian_TP = 0 accumulated_pedestrian_TN = 0 accumulated_pedestrian_FN = 0 accumulated_pedestrian_FP = 0 accumulated_red_tl_TP = 0 accumulated_red_tl_TN = 0 accumulated_red_tl_FP = 0 accumulated_red_tl_FN = 0 accumulated_vehicle_stop_TP = 0 accumulated_vehicle_stop_TN = 0 accumulated_vehicle_stop_FP = 0 accumulated_vehicle_stop_FN = 0 iteration_on_checkpoint = 0 for data in data_loader: if g_conf.MODEL_TYPE in ['one-step-affordances']: c_output, r_output, layers = model.forward_outputs( torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda(), dataset.extract_commands(data).cuda()) elif g_conf.MODEL_TYPE in ['separate-affordances']: if g_conf.ENCODER_MODEL_TYPE in [ 'action_prediction', 'stdim', 'ETEDIM', 'FIMBC', 'one-step-affordances' ]: e, layers = encoder_model.forward_encoder( torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda(), torch.squeeze( dataset.extract_commands(data).cuda())) c_output, r_output = model.forward_test(e) elif g_conf.ENCODER_MODEL_TYPE in [ 'ETE', 'ETE_inverse_model', 'forward', 'ETE_stdim' ]: e, layers = encoder_model.forward_encoder( torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda(), torch.squeeze( dataset.extract_commands(data).cuda())) c_output, r_output = model.forward_test(e) if plot_attentions: attentions_path = os.path.join( '_logs', exp_batch, g_conf.EXPERIMENT_NAME, g_conf.PROCESS_NAME + '_attentions_' + str(latest)) write_attentions(torch.squeeze(data['rgb']), layers, iteration_on_checkpoint, attentions_path) # Accurancy = (TP+TN)/(TP+TN+FP+FN) # F1-score = 2*TP / (2*TP + FN + FP) classification_gt = dataset.extract_affordances_targets( data, 'classification') regression_gt = dataset.extract_affordances_targets( data, 'regression') TP = 0 FN = 0 FP = 0 TN = 0 for i in range(classification_gt.shape[0]): if classification_gt[i, 0] == ( c_output[0][i, 0] < c_output[0][i, 1]).type( torch.FloatTensor) == 1: TP += 1 elif classification_gt[ i, 0] == 1 and classification_gt[i, 0] != ( c_output[0][i, 0] < c_output[0][i, 1]).type(torch.FloatTensor): FN += 1 elif classification_gt[ i, 0] == 0 and classification_gt[i, 0] != ( c_output[0][i, 0] < c_output[0][i, 1]).type(torch.FloatTensor): FP += 1 if classification_gt[i, 0] == ( c_output[0][i, 0] < c_output[0][i, 1]).type( torch.FloatTensor) == 0: TN += 1 accumulated_pedestrian_TP += TP accumulated_pedestrian_TN += TN accumulated_pedestrian_FP += FP accumulated_pedestrian_FN += FN TP = 0 FN = 0 FP = 0 TN = 0 for i in range(classification_gt.shape[0]): if classification_gt[i, 1] == ( c_output[1][i, 0] < c_output[1][i, 1]).type( torch.FloatTensor) == 1: TP += 1 elif classification_gt[ i, 1] == 1 and classification_gt[i, 1] != ( c_output[1][i, 0] < c_output[1][i, 1]).type(torch.FloatTensor): FN += 1 elif classification_gt[ i, 1] == 0 and classification_gt[i, 1] != ( c_output[1][i, 0] < c_output[1][i, 1]).type(torch.FloatTensor): FP += 1 if classification_gt[i, 1] == ( c_output[1][i, 0] < c_output[1][i, 1]).type( torch.FloatTensor) == 0: TN += 1 accumulated_red_tl_TP += TP accumulated_red_tl_TN += TN accumulated_red_tl_FP += FP accumulated_red_tl_FN += FN TP = 0 FN = 0 FP = 0 TN = 0 for i in range(classification_gt.shape[0]): if classification_gt[i, 2] == ( c_output[2][i, 0] < c_output[2][i, 1]).type( torch.FloatTensor) == 1: TP += 1 elif classification_gt[i, 2] == 1 and classification_gt[i, 2] !=\ (c_output[2][i, 0] < c_output[2][i, 1]).type(torch.FloatTensor): FN += 1 elif classification_gt[i, 2] == 0 and classification_gt[i, 2] !=\ (c_output[2][i, 0] < c_output[2][i, 1]).type(torch.FloatTensor): FP += 1 if classification_gt[i, 2] == ( c_output[2][i, 0] < c_output[2][i, 1]).type( torch.FloatTensor) == 0: TN += 1 accumulated_vehicle_stop_TP += TP accumulated_vehicle_stop_TN += TN accumulated_vehicle_stop_FP += FP accumulated_vehicle_stop_FN += FN # if the data was normalized during training, we need to transform it to its unit write_regular_output(checkpoint_iteration, torch.squeeze(r_output[0]), regression_gt[:, 0]) mae_ra = torch.abs(regression_gt[:, 0] - torch.squeeze(r_output[0]).type(torch.FloatTensor)).\ numpy() accumulated_mae_ra += np.sum(mae_ra) if iteration_on_checkpoint % 100 == 0: print( "Validation iteration: %d [%d/%d)] on Checkpoint %d " % (iteration_on_checkpoint, iteration_on_checkpoint, len(data_loader), checkpoint_iteration)) iteration_on_checkpoint += 1 # Here also need a better analysis. TODO divide into curve and other things MAE_relative_angle = accumulated_mae_ra / (len(dataset)) csv_outfile = open(summary_file, 'a') csv_outfile.write( "%s, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f" % (checkpoint_iteration, accumulated_pedestrian_TP, accumulated_pedestrian_FP, accumulated_pedestrian_FN, accumulated_pedestrian_TN, accumulated_vehicle_stop_TP, accumulated_vehicle_stop_FP, accumulated_vehicle_stop_FN, accumulated_vehicle_stop_TN, accumulated_red_tl_TP, accumulated_red_tl_FP, accumulated_red_tl_FN, accumulated_red_tl_TN, MAE_relative_angle)) csv_outfile.write("\n") csv_outfile.close() else: print('The checkpoint you want to validate is not yet ready ', str(latest)) coil_logger.add_message('Finished', {}) print('VALIDATION FINISHED !!') print(' Validation results saved in ==> ', summary_file) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest) except RuntimeError as e: if latest is not None: coil_logger.erase_csv(latest) coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest)
class CoILAgent(Agent): def __init__(self, checkpoint, architecture_name): #experiment_name='None', driver_conf=None, memory_fraction=0.18, #image_cut=[115, 510]): # use_planner=False,graph_file=None,map_file=None,augment_left_right=False,image_cut = [170,518]): Agent.__init__(self) # This should likely come from global #config_gpu = tf.ConfigProto() #config_gpu.gpu_options.visible_device_list = '0' #config_gpu.gpu_options.per_process_gpu_memory_fraction = memory_fraction #self._sess = tf.Session(config=config_gpu) # THIS DOES NOT WORK FOR FUSED PLUS LSTM #if self._config.number_frames_sequenced > self._config.number_frames_fused: # self._config_train.batch_size = self._config.number_frames_sequenced #else: # self._config_train.batch_size = self._config.number_frames_fused #self._train_manager = load_system(self._config_train) #self._config.train_segmentation = False self.architecture_name = architecture_name if architecture_name == 'coil_unit': self.model_task, self.model_gen = CoILModel('coil_unit') self.model_task, self.model_gen = self.model_task.cuda( ), self.model_gen.cuda() elif architecture_name == 'unit_task_only': self.model_task, self.model_gen = CoILModel('unit_task_only') self.model_task, self.model_gen = self.model_task.cuda( ), self.model_gen.cuda() else: self.model = CoILModel(architecture_name) self.model.cuda() if architecture_name == 'wgangp_lsd': # print(ckpt, checkpoint['best_loss_iter_F']) self.model.load_state_dict(checkpoint['stateF_dict']) self.model.eval() elif architecture_name == 'coil_unit': self.model_task.load_state_dict(checkpoint['task']) self.model_gen.load_state_dict(checkpoint['b']) self.model_task.eval() self.model_gen.eval() elif architecture_name == 'coil_icra': self.model.load_state_dict(checkpoint['state_dict']) self.model.eval() elif architecture_name == 'unit_task_only': self.model_task.load_state_dict(checkpoint['task_state_dict']) self.model_gen.load_state_dict(checkpoint['enc_state_dict']) self.model_task.eval() self.model_gen.eval() #self.model.load_network(checkpoint) #self._sess.run(tf.global_variables_initializer()) #self._control_function = getattr(machine_output_functions, # self._train_manager._config.control_mode) # More elegant way to merge with autopilot #self._agent = Autopilot(ConfigAutopilot(driver_conf.city_name)) #self._image_cut = driver_conf.image_cut #self._auto_pilot = driver_conf.use_planner #self._recording = False #self._start_time = 0 def run_step(self, measurements, sensor_data, directions, target): #control_agent = self._agent.run_step(measurements, None, target) print(" RUnning STEP ") speed = torch.cuda.FloatTensor( [measurements.player_measurements.forward_speed]).unsqueeze(0) print("Speed is", speed) print("Speed shape ", speed) directions_tensor = torch.cuda.LongTensor([directions]) # model_outputs = self.model.forward_branch(self._process_sensors(sensor_data), speed, # directions_tensor) if self.architecture_name == 'wgangp_lsd': embed, model_outputs = self.model( self._process_sensors(sensor_data), speed) elif self.architecture_name == 'coil_unit': embed, n_b = self.model_gen.encode( self._process_sensors(sensor_data)) model_outputs = self.model_task(embed, speed) elif self.architecture_name == 'unit_task_only': embed, n_b = self.model_gen.encode( self._process_sensors(sensor_data)) model_outputs = self.model_task(embed, speed) elif self.architecture_name == 'coil_icra': model_outputs = self.model.forward_branch( self._process_sensors(sensor_data), speed, directions_tensor) print(model_outputs) if self.architecture_name == 'coil_icra': steer, throttle, brake = self._process_model_outputs( model_outputs[0], measurements.player_measurements.forward_speed) else: steer, throttle, brake = self._process_model_outputs( model_outputs[0][0], measurements.player_measurements.forward_speed) control = carla_protocol.Control() control.steer = steer control.throttle = throttle control.brake = brake # if self._auto_pilot: # control.steer = control_agent.steer # TODO: adapt the client side agent for the new version. ( PROBLEM ) #control.throttle = control_agent.throttle #control.brake = control_agent.brake # TODO: maybe change to a more meaningfull message ?? return control def _process_sensors(self, sensors): iteration = 0 for name, size in g_conf.SENSORS.items(): sensor = sensors[name].data[140:260, ...] #300*800*3 image_input = transform.resize(sensor, (128, 128)) # transforms.Normalize([ 0.5315, 0.5521, 0.5205], [ 0.1960, 0.1810, 0.2217]) image_input = np.transpose(image_input, (2, 0, 1)) image_input = torch.from_numpy(image_input).type( torch.FloatTensor).cuda() image_input = image_input #normalization print("torch size", image_input.size()) img_np = np.uint8( np.transpose(image_input.cpu().numpy() * 255, (1, 2, 0))) # plt.figure(1) # plt.subplot(1, 2, 1) # plt.imshow(sensor) # # plt.subplot(1,2,2) # plt.imshow(img_np) # # # plt.show() iteration += 1 # print (image_input.shape) image_input = image_input.unsqueeze(0) print(image_input.shape) return image_input def _process_model_outputs(self, outputs, speed): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ print("OUTPUTS", outputs) steer, throttle, brake = outputs[0], outputs[1], outputs[2] # if steer > 0.5: # throttle *= (1 - steer + 0.3) # steer += 0.3 # if steer > 1: # steer = 1 # if steer < -0.5: # throttle *= (1 + steer + 0.3) # steer -= 0.3 # if steer < -1: # steer = -1 # if brake < 0.2: # brake = 0.0 # if throttle > brake: brake = 0.0 # else: # throttle = throttle * 2 # if speed > 35.0 and brake == 0.0: # throttle = 0.0 return steer, throttle, brake
def execute(gpu, exp_batch, exp_alias, state_dict, suppress_output=True, number_of_workers=12): """ The main training function. This functions loads the latest checkpoint for a given, exp_batch (folder) and exp_alias (experiment configuration). With this checkpoint it starts from the beginning or continue some training. Args: gpu: The GPU number exp_batch: the folder with the experiments exp_alias: the alias, experiment name suppress_output: if the output are going to be saved on a file number_of_workers: the number of threads used for data loading Returns: None """ try: # We set the visible cuda devices to select the GPU os.environ["CUDA_VISIBLE_DEVICES"] = gpu g_conf.VARIABLE_WEIGHT = {} # At this point the log file with the correct naming is created. # You merge the yaml file with the global configuration structure. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') # Set the process into loading status. coil_logger.add_message('Loading', {'GPU': gpu}) # Put the output to a separate file if it is the case if suppress_output: if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if coil_logger.check_finish('train'): coil_logger.add_message('Finished', {}) return # Preload option if g_conf.PRELOAD_MODEL_ALIAS is not None: checkpoint = torch.load( os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH, g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints', str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth')) # Get the latest checkpoint to be loaded # returns none if there are no checkpoints saved for this model checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file is not None: checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] else: iteration = 0 best_loss = 10000.0 best_loss_iter = 0 # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) # By instantiating the augmenter we get a callable that augment images and transform them # into tensors. augmenter = Augmenter(g_conf.AUGMENTATION) # Instantiate the class used to read a dataset. The coil dataset generator # can be found dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=str(g_conf.NUMBER_OF_HOURS) + 'hours_' + g_conf.TRAIN_DATASET_NAME) print("Loaded dataset") data_loader = select_balancing_strategy(dataset, iteration, number_of_workers) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model.cuda() if state_dict != '': seg_model = ERFNet_Fast(2) seg_model = load_my_state_dict(seg_model, torch.load(state_dict)) seg_model.cuda() optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE) if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) accumulated_time = checkpoint['total_time'] loss_window = coil_logger.recover_loss_window('train', iteration) else: # We accumulate iteration time and keep the average speed accumulated_time = 0 loss_window = [] print("Before the loss") criterion = Loss(g_conf.LOSS_FUNCTION) color_transforms = Colorizes(2) board = Dashboard(8097) # Loss time series window for data in data_loader: # Basically in this mode of execution, we validate every X Steps, if it goes up 3 times, # add a stop on the _logs folder that is going to be read by this process if g_conf.FINISH_ON_VALIDATION_STALE is not None and \ check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE): break """ #################################### Main optimization loop #################################### """ iteration += 1 if iteration % 1000 == 0: adjust_learning_rate_auto(optimizer, loss_window) # get the control commands from float_data, size = [120,1] capture_time = time.time() controls = data['directions'] # The output(branches) is a list of 5 branches results, each branch is with size [120,3] model.zero_grad() if state_dict != '': with torch.no_grad(): repre = seg_model(torch.squeeze(data['rgb'].cuda()), only_encode=False) inputs = repre imgs = color_transforms(inputs) inputs = inputs.float().cuda() else: inputs = torch.squeeze(data['rgb'].cuda()) # vis board.image( torch.squeeze(data['rgb'])[0].cpu().data, '(train) input iter: ' + str(iteration)) board.image(imgs[0].cpu().data, '(train) output iter: ' + str(iteration)) branches = model(inputs, dataset.extract_inputs(data).cuda()) loss_function_params = { 'branches': branches, 'targets': dataset.extract_targets(data).cuda(), 'controls': controls.cuda(), 'inputs': dataset.extract_inputs(data).cuda(), 'branch_weights': g_conf.BRANCH_LOSS_WEIGHT, 'variable_weights': g_conf.VARIABLE_WEIGHT } loss, _ = criterion(loss_function_params) loss.backward() optimizer.step() """ #################################### Saving the model if necessary #################################### """ if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict(), 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) """ ################################################ Adding tensorboard logs. Making calculations for logging purposes. These logs are monitored by the printer module. ################################################# """ coil_logger.add_scalar('Loss', loss.data, iteration) coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration) if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration # Log a random position position = random.randint(0, len(data) - 1) output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(data).cuda()) accumulated_time += time.time() - capture_time coil_logger.add_message( 'Iterating', { 'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets(data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data)[position].data.tolist() }, iteration) loss_window.append(loss.data.tolist()) coil_logger.write_on_error_csv('train', loss.data) print("Iteration: %d Loss: %f" % (iteration, loss.data)) coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except RuntimeError as e: coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
def execute(gpu, exp_batch, exp_alias, validation_dataset, suppress_output): latest = None try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, f'{exp_alias}.yaml')) # The validation dataset is always fully loaded, so we fix a very high number of hours g_conf.NUMBER_OF_HOURS = 10000 set_type_of_process(process_type='validation', param=validation_dataset) # Save the output to a file if so desired if suppress_output: save_output(exp_alias) # Define the dataset. This structure has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], validation_dataset) augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=validation_dataset, process_type='validation') # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.SENSORS).cuda() # The window used to keep track of the trainings l1_window = [] latest = get_latest_evaluated_checkpoint() if latest is not None: # When latest is noe l1_window = coil_logger.recover_loss_window( validation_dataset, None) # Keep track of the best loss and the iteration where it happens best_loss = 1000 best_loss_iter = 0 print(20 * '#') print('Starting validation!') print(20 * '#') # Check if the maximum checkpoint for validating has been reached while not maximum_checkpoint_reached(latest): # Wait until the next checkpoint is ready (assuming this is run whilst training the model) if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): # Get next checkpoint for validation according to the test schedule and load it latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', f'{latest}.pth')) checkpoint_iteration = checkpoint['iteration'] model.load_state_dict(checkpoint['state_dict']) model.eval() # Turn off dropout and batchnorm (if any) print(f"Validation loaded, checkpoint {checkpoint_iteration}") # Main metric will be the used loss for training the network criterion = Loss(g_conf.LOSS_FUNCTION) checkpoint_average_loss = 0 # Counter iteration_on_checkpoint = 0 with torch.no_grad(): # save some computation/memory for data in data_loader: # Compute the forward pass on a batch from the validation dataset controls = data['directions'].cuda() img = torch.squeeze(data['rgb']).cuda() speed = dataset.extract_inputs( data).cuda() # this might not always be speed # For auxiliary metrics output = model.forward_branch(img, speed, controls) # For the loss function branches = model(img, speed) loss_function_params = { 'branches': branches, 'targets': dataset.extract_targets(data).cuda(), 'controls': controls, 'inputs': speed, 'branch_weights': g_conf.BRANCH_LOSS_WEIGHT, 'variable_weights': g_conf.VARIABLE_WEIGHT } # It could be either waypoints or direct control if 'waypoint1_angle' in g_conf.TARGETS: write_waypoints_output(checkpoint_iteration, output) else: write_regular_output(checkpoint_iteration, output) loss, _ = criterion(loss_function_params) loss = loss.data.tolist() # Log a random position position = random.randint( 0, len(output.data.tolist()) - 1) coil_logger.add_message( 'Iterating', { 'Checkpoint': latest, 'Iteration': f'{iteration_on_checkpoint * g_conf.BATCH_SIZE}/{len(dataset)}', f'Validation Loss ({g_conf.LOSS_FUNCTION})': loss, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets( data)[position].data.tolist(), 'Inputs': dataset.extract_inputs(data) [position].data.tolist() }, latest) # We get the average with a growing list of values # Thanks to John D. Cook: http://www.johndcook.com/blog/standard_deviation/ iteration_on_checkpoint += 1 checkpoint_average_loss += ( loss - checkpoint_average_loss) / iteration_on_checkpoint print( f"\rProgress: {100 * iteration_on_checkpoint * g_conf.BATCH_SIZE / len(dataset):3.4f}% - " f"Average Loss ({g_conf.LOSS_FUNCTION}): {checkpoint_average_loss:.16f}", end='') """ ######## Finish a round of validation, write results, wait for the next ######## """ coil_logger.add_scalar( f'Validation Loss ({g_conf.LOSS_FUNCTION})', checkpoint_average_loss, latest, True) # Let's visualize the distribution of the loss coil_logger.add_histogram( f'Validation Checkpoint Loss ({g_conf.LOSS_FUNCTION})', checkpoint_average_loss, latest) if checkpoint_average_loss < best_loss: best_loss = checkpoint_average_loss best_loss_iter = latest coil_logger.add_message( 'Iterating', { 'Summary': { 'Loss': checkpoint_average_loss, 'BestLoss': best_loss, 'BestLossCheckpoint': best_loss_iter }, 'Checkpoint': latest }, latest) l1_window.append(checkpoint_average_loss) coil_logger.write_on_error_csv(validation_dataset, checkpoint_average_loss, latest) # If we are using the finish when validation stops, we check the current checkpoint if g_conf.FINISH_ON_VALIDATION_STALE is not None: if dlib.count_steps_without_decrease(l1_window) > 3 and \ dlib.count_steps_without_decrease_robust(l1_window) > 3: coil_logger.write_stop(validation_dataset, latest) break else: latest = get_latest_evaluated_checkpoint() time.sleep(1) coil_logger.add_message('Loading', {'Message': 'Waiting Checkpoint'}) print("Waiting for the next Validation") print('\n' + 20 * '#') print('Finished validation!') print(20 * '#') coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest) except RuntimeError as e: if latest is not None: coil_logger.erase_csv(latest) coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest)
def execute(gpu, exp_batch, exp_alias): # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join(exp_batch, exp_alias + '.yaml')) set_type_of_process('validation') sys.stdout = open(str(os.getpid()) + ".out", "a", buffering=1) if monitorer.get_status(exp_batch, exp_alias, g_conf.PROCESS_NAME)[0] == "Finished": # TODO: print some cool summary or not ? return #Define the dataset. This structure is has the __get_item__ redefined in a way #that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.DATASET_NAME) dataset = CoILDataset(full_dataset, transform=transforms.Compose([transforms.ToTensor() ])) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. # TODO: batch size an number of workers go to some configuration file data_loader = torch.utils.data.DataLoader(dataset, batch_size=120, shuffle=False, num_workers=12, pin_memory=True) # TODO: here there is clearly a posibility to make a cool "conditioning" system. model = CoILModel(g_conf.MODEL_NAME) model.cuda() # TODO: The checkpoint will continue, so the logs should restart ??? OR continue were it was latest = get_latest_evaluated_checkpoint() if latest is None: # When nothing was tested, get latest returns none, we fix that. latest = 0 print(dataset.meta_data) while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(latest) + '.pth')) checkpoint_iteration = checkpoint['iteration'] print("Validation loaded ", checkpoint_iteration) for data in data_loader: input_data, labels = data control_position = np.where( dataset.meta_data[:, 0] == 'control')[0][0] speed_position = np.where( dataset.meta_data[:, 0] == 'speed_module')[0][0] print(torch.squeeze(input_data['rgb']).shape) print(control_position) print(speed_position) # Obs : Maybe we could also check for other branches ?? output = model.forward_branch( torch.squeeze(input_data['rgb']).cuda(), labels[:, speed_position, :].cuda(), labels[:, control_position, :].cuda()) # TODO: clean this squeeze and dimension things for i in range(input_data['rgb'].shape[0]): coil_logger.write_on_csv( checkpoint_iteration, [output[i][0], output[i][1], output[i][2]]) #loss = criterion(output, labels) #loss.backward() #optimizer.step() #shutil.copyfile(filename, 'model_best.pth.tar') else: time.sleep(1) print("Waiting for the next Validation")
def setup(self, path_to_config_file): self._agent = None self.route_assigned = False self.count = 0 exp_dir = os.path.join( '/', os.path.join(*path_to_config_file.split('/')[:-1])) yaml_conf, checkpoint_number, agent_name, encoder_params = checkpoint_parse_configuration_file( path_to_config_file) if encoder_params == "None": encoder_params = None g_conf.immutable(False) merge_with_yaml( os.path.join('/', os.path.join(*path_to_config_file.split('/')[:-4]), yaml_conf), encoder_params) if g_conf.MODEL_TYPE in ['one-step-affordances']: # one step training, no need to retrain FC layers, we just get the output of encoder model as prediciton self._model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '.pth')) print("Affordances Model ", str(checkpoint_number) + '.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() elif g_conf.MODEL_TYPE in ['separate-affordances']: if encoder_params is not None: self.encoder_model = EncoderModel( g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.encoder_model.cuda() # Here we load the pre-trained encoder (not fine-tunned) if g_conf.FREEZE_ENCODER: encoder_checkpoint = torch.load( os.path.join( os.path.join( '/', os.path.join( *path_to_config_file.split('/')[:-4])), '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) print( "Encoder model ", str(encoder_params['encoder_checkpoint']), "loaded from ", os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints')) self.encoder_model.load_state_dict( encoder_checkpoint['state_dict']) self.encoder_model.eval() for param_ in self.encoder_model.parameters(): param_.requires_grad = False else: encoder_checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '_encoder.pth')) print("FINE TUNNED encoder model ", str(checkpoint_number) + '_encoder.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self.encoder_model.load_state_dict( encoder_checkpoint['state_dict']) self.encoder_model.eval() for param_ in self.encoder_model.parameters(): param_.requires_grad = False else: raise RuntimeError( 'encoder_params can not be None in MODEL_TYPE --> separate-affordances' ) self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) self.checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '.pth')) print("Affordances Model ", str(checkpoint_number) + '.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval()
class AffordancesAgent(object): def __init__(self, path_to_config_file): # params for now it is not used but we might want to use this to set self.setup(path_to_config_file) self.save_attentions = False def setup(self, path_to_config_file): self._agent = None self.route_assigned = False self.count = 0 exp_dir = os.path.join( '/', os.path.join(*path_to_config_file.split('/')[:-1])) yaml_conf, checkpoint_number, agent_name, encoder_params = checkpoint_parse_configuration_file( path_to_config_file) if encoder_params == "None": encoder_params = None g_conf.immutable(False) merge_with_yaml( os.path.join('/', os.path.join(*path_to_config_file.split('/')[:-4]), yaml_conf), encoder_params) if g_conf.MODEL_TYPE in ['one-step-affordances']: # one step training, no need to retrain FC layers, we just get the output of encoder model as prediciton self._model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '.pth')) print("Affordances Model ", str(checkpoint_number) + '.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() elif g_conf.MODEL_TYPE in ['separate-affordances']: if encoder_params is not None: self.encoder_model = EncoderModel( g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.encoder_model.cuda() # Here we load the pre-trained encoder (not fine-tunned) if g_conf.FREEZE_ENCODER: encoder_checkpoint = torch.load( os.path.join( os.path.join( '/', os.path.join( *path_to_config_file.split('/')[:-4])), '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) print( "Encoder model ", str(encoder_params['encoder_checkpoint']), "loaded from ", os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints')) self.encoder_model.load_state_dict( encoder_checkpoint['state_dict']) self.encoder_model.eval() for param_ in self.encoder_model.parameters(): param_.requires_grad = False else: encoder_checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '_encoder.pth')) print("FINE TUNNED encoder model ", str(checkpoint_number) + '_encoder.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self.encoder_model.load_state_dict( encoder_checkpoint['state_dict']) self.encoder_model.eval() for param_ in self.encoder_model.parameters(): param_.requires_grad = False else: raise RuntimeError( 'encoder_params can not be None in MODEL_TYPE --> separate-affordances' ) self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) self.checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '.pth')) print("Affordances Model ", str(checkpoint_number) + '.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() def get_sensors_dict(self): """ The agent sets the sensors that it is going to use. That has to be set into the environment for it to produce this data. """ sensors_dict = [{ 'type': 'sensor.camera.rgb', 'x': 2.0, 'y': 0.0, 'z': 1.40, 'roll': 0.0, 'pitch': -15.0, 'yaw': 0.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'rgb_central' }] return sensors_dict # TODO we set the sensors here directly. def sensors(self): return self._sensors_dict def get_state(self, exp_list, target_speed=20.0): """ Based on the exp object it makes all the affordances. :param exp: :return: """ exp = exp_list[0] self._vehicle = exp._ego_actor if self._agent is None: self._agent = True self._state = AgentState.NAVIGATING args_lateral_dict = { 'K_P': 1, 'K_D': 0.02, 'K_I': 0, 'dt': 1.0 / 20.0 } self._local_planner = LocalPlanner(self._vehicle, opt_dict={ 'target_speed': target_speed, 'lateral_control_dict': args_lateral_dict }) self._hop_resolution = 2.0 self._path_seperation_hop = 2 self._path_seperation_threshold = 0.5 self._grp = None if not self.route_assigned: plan = [] for transform, road_option in exp._route: wp = exp._ego_actor.get_world().get_map().get_waypoint( transform.location) plan.append((wp, road_option)) self._local_planner.set_global_plan(plan) self.route_assigned = True input_data = exp._sensor_interface.get_data() input_data = self._process_sensors( input_data['rgb_central'][1]) #torch.Size([1, 3, 88, 200] if g_conf.MODEL_TYPE in ['one-step-affordances']: c_output, r_output, layers = self._model.forward_outputs( input_data.cuda(), torch.cuda.FloatTensor( [exp._forward_speed / g_conf.SPEED_FACTOR]).unsqueeze(0), torch.cuda.FloatTensor(encode_directions( exp._directions)).unsqueeze(0)) elif g_conf.MODEL_TYPE in ['separate-affordances']: if g_conf.ENCODER_MODEL_TYPE in [ 'action_prediction', 'stdim', 'ETEDIM', 'FIMBC', 'one-step-affordances' ]: e, layers = self.encoder_model.forward_encoder( input_data.cuda(), torch.cuda.FloatTensor([ exp._forward_speed / g_conf.SPEED_FACTOR ]).unsqueeze(0), torch.cuda.FloatTensor(encode_directions( exp._directions)).unsqueeze(0)) c_output, r_output = self._model.forward_test(e) elif g_conf.ENCODER_MODEL_TYPE in [ 'ETE', 'ETE_inverse_model', 'forward', 'ETE_stdim' ]: e, layers = self.encoder_model.forward_encoder( input_data.cuda(), torch.cuda.FloatTensor([ exp._forward_speed / g_conf.SPEED_FACTOR ]).unsqueeze(0), torch.cuda.FloatTensor(encode_directions( exp._directions)).unsqueeze(0)) c_output, r_output = self._model.forward_test(e) if self.save_attentions: exp_params = exp._exp_params attentions_full_path = os.path.join( os.environ["SRL_DATASET_PATH"], exp_params['package_name'], exp_params['env_name'], str(exp_params['env_number']) + '_' + exp._agent_name, str(exp_params['exp_number'])) save_attentions(input_data.cuda(), layers, self.count, attentions_full_path, save_input=False, big_size=False) self.count += 1 affordances = {} output_relative_angle = torch.squeeze( r_output[0]).cpu().detach().numpy() * 1.0 is_pedestrian_hazard = False if c_output[0][0, 0] < c_output[0][0, 1]: is_pedestrian_hazard = True is_red_tl_hazard = False if c_output[1][0, 0] < c_output[1][0, 1]: is_red_tl_hazard = True is_vehicle_hazard = False if (c_output[2][0, 0] < c_output[2][0, 1]): is_vehicle_hazard = True affordances.update({'is_pedestrian_hazard': is_pedestrian_hazard}) affordances.update({'is_red_tl_hazard': is_red_tl_hazard}) affordances.update({'is_vehicle_hazard': is_vehicle_hazard}) affordances.update({'relative_angle': output_relative_angle}) # Now we consider all target speed to be 20.0 affordances.update({'target_speed': target_speed}) #affordances.update({'GT_is_pedestrian_hazard': }) #affordances.update({'GT_is_red_tl_hazard': }) #affordances.update({'GT_is_vehicle_hazard': }) gt_relative_angle = compute_relative_angle( self._vehicle, self._local_planner.get_target_waypoint()) affordances.update({'GT_relative_angle': gt_relative_angle}) affordances.update({ 'ERROR_relative_angle': output_relative_angle - gt_relative_angle }) return affordances def make_reward(self, exp): # Just basically return None since the reward is not used for a non return None def step(self, affordances): hazard_detected = False is_vehicle_hazard = affordances['is_vehicle_hazard'] is_red_tl_hazard = affordances['is_red_tl_hazard'] is_pedestrian_hazard = affordances['is_pedestrian_hazard'] relative_angle = affordances['relative_angle'] target_speed = affordances['target_speed'] # once we meet a speed limit sign, the target speed changes #if target_speed != self._local_planner._target_speed: # self._local_planner.set_speed(target_speed) #forward_speed = affordances['forward_speed'] if is_vehicle_hazard: self._state = AgentState.BLOCKED_BY_VEHICLE hazard_detected = True if is_red_tl_hazard: self._state = AgentState.BLOCKED_RED_LIGHT hazard_detected = True if is_pedestrian_hazard: self._state = AgentState.BLOCKED_BY_PEDESTRIAN hazard_detected = True if hazard_detected: control = self.emergency_stop() else: self._state = AgentState.NAVIGATING control = self._local_planner.run_step(relative_angle, target_speed) logging.debug("Output %f %f %f " % (control.steer, control.throttle, control.brake)) return control def reinforce(self, rewards): """ This agent cannot learn so there is no reinforce """ pass def reset(self): print(" Correctly reseted the agent") self.route_assigned = False self._agent = None self.count = 0 def emergency_stop(self): """ Send an emergency stop command to the vehicle :return: """ control = carla.VehicleControl() control.steer = 0.0 control.throttle = 0.0 control.brake = 1.0 control.hand_brake = False return control def _process_sensors(self, sensor): sensor = sensor[:, :, 0:3] # BGRA->BRG drop alpha channel sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :, :] # crop sensor = scipy.misc.imresize(sensor, (g_conf.SENSORS['rgb_central'][1], g_conf.SENSORS['rgb_central'][2])) self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type( torch.FloatTensor).cuda() image_input = sensor.unsqueeze(0) self.latest_image_tensor = image_input return image_input
def execute(gpu, exp_batch, exp_alias, suppress_output=True, number_of_workers=12): """ The main training function. This functions loads the latest checkpoint for a given, exp_batch (folder) and exp_alias (experiment configuration). With this checkpoint it starts from the beginning or continue some training. Args: gpu: gpus ids for training exp_batch: the folder with the experiments exp_alias: the alias, experiment name suppress_output: if the output are going to be saved on a file number_of_workers: the number of threads used for data loading Returns: None """ try: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(gpu) g_conf.VARIABLE_WEIGHT = {} # At this point the log file with the correct naming is created. # You merge the yaml file with the global configuration structure. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) set_type_of_process('train') # Set the process into loading status. coil_logger.add_message('Loading', {'GPU': gpu}) # Put the output to a separate file if it is the case if suppress_output: if not os.path.exists('_output_logs'): os.mkdir('_output_logs') sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) if coil_logger.check_finish('train'): coil_logger.add_message('Finished', {}) return # Preload option if g_conf.PRELOAD_MODEL_ALIAS is not None: checkpoint = torch.load( os.path.join('_logs', g_conf.PRELOAD_MODEL_BATCH, g_conf.PRELOAD_MODEL_ALIAS, 'checkpoints', str(g_conf.PRELOAD_MODEL_CHECKPOINT) + '.pth')) # Get the latest checkpoint to be loaded # returns none if there are no checkpoints saved for this model checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file is not None: checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] best_loss = checkpoint['best_loss'] best_loss_iter = checkpoint['best_loss_iter'] print('iteration: ', iteration, 'best_loss: ', best_loss) else: iteration = 0 best_loss = 10000.0 best_loss_iter = 0 # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.TRAIN_DATASET_NAME) # By instantiating the augmenter we get a callable that augment images and transform them into tensors. augmenter = Augmenter(g_conf.AUGMENTATION) # Instantiate the class used to read the dataset dataset = CoILDataset(full_dataset, transform=augmenter, preload_name=str(g_conf.NUMBER_OF_HOURS) + 'hours_' + g_conf.TRAIN_DATASET_NAME) print("Loaded dataset") # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # define the sampling strategy for mini-batch, different samplers can be found in 'splitter.py' data_loader = select_balancing_strategy(dataset, iteration, number_of_workers) # Instatiate the network architecture model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model.cuda() optimizer = optim.Adam(model.parameters(), lr=g_conf.LEARNING_RATE ) # adabound and adamio can also be used here if checkpoint_file is not None or g_conf.PRELOAD_MODEL_ALIAS is not None: model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) accumulated_time = checkpoint['total_time'] loss_window = coil_logger.recover_loss_window('train', iteration) else: # We accumulate iteration time and keep the average speed accumulated_time = 0 loss_window = [] # freeze the perception module weights if required # for m in model.perception.parameters(): # m.requires_grad = False # total trainable parameters model_parameters = filter(lambda p: p.requires_grad, model.parameters()) total_params = sum([np.prod(p.size()) for p in model_parameters]) print('trainable parameters: ', total_params) # multi-gpu print('number of gpus: ', torch.cuda.device_count()) if torch.cuda.device_count() > 1: model = nn.DataParallel(model) criterion = Loss(g_conf.LOSS_FUNCTION) print('Start Training') st = time.time() for data in data_loader: # use this for early stopping if the validation loss is not coming down if g_conf.FINISH_ON_VALIDATION_STALE is not None and \ check_loss_validation_stopped(iteration, g_conf.FINISH_ON_VALIDATION_STALE): break """ #################################### Main optimization loop #################################### """ iteration += 1 if iteration % 1000 == 0: adjust_learning_rate_auto(optimizer, loss_window) # additional learning rate scheduler - cyclic cosine annealing (https://arxiv.org/pdf/1704.00109.pdf) # adjust_learning_rate_cosine_annealing(optimizer, loss_window, iteration) capture_time = time.time() controls = data['directions'] model.zero_grad() branches = model(torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda()) loss_function_params = { 'branches': branches, 'targets': dataset.extract_targets(data).cuda(), 'controls': controls.cuda(), 'inputs': dataset.extract_inputs(data).cuda(), 'branch_weights': g_conf.BRANCH_LOSS_WEIGHT, 'variable_weights': g_conf.VARIABLE_WEIGHT } loss, _ = criterion(loss_function_params) loss.backward() optimizer.step() """ #################################### Saving the model if necessary #################################### """ if is_ready_to_save(iteration): if torch.cuda.device_count() > 1: state_dict_save = model.module.state_dict() else: state_dict_save = model.state_dict() state = { 'iteration': iteration, 'state_dict': state_dict_save, 'best_loss': best_loss, 'total_time': accumulated_time, 'optimizer': optimizer.state_dict(), 'best_loss_iter': best_loss_iter } torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) """ ################################################ Adding tensorboard logs. Making calculations for logging purposes. These logs are monitored by the printer module. ################################################# """ coil_logger.add_scalar('Loss', loss.data, iteration) coil_logger.add_image('Image', torch.squeeze(data['rgb']), iteration) if loss.data < best_loss: best_loss = loss.data.tolist() best_loss_iter = iteration # Log a random position position = random.randint(0, len(data) - 1) if torch.cuda.device_count() > 1: output = model.module.extract_branch( torch.stack(branches[0:4]), controls) else: output = model.extract_branch(torch.stack(branches[0:4]), controls) error = torch.abs(output - dataset.extract_targets(data).cuda()) accumulated_time += time.time() - capture_time coil_logger.add_message( 'Iterating', { 'Iteration': iteration, 'Loss': loss.data.tolist(), 'Images/s': (iteration * g_conf.BATCH_SIZE) / accumulated_time, 'BestLoss': best_loss, 'BestLossIteration': best_loss_iter, 'Output': output[position].data.tolist(), 'GroundTruth': dataset.extract_targets(data)[position].data.tolist(), 'Error': error[position].data.tolist(), 'Inputs': dataset.extract_inputs(data)[position].data.tolist() }, iteration) loss_window.append(loss.data.tolist()) coil_logger.write_on_error_csv('train', loss.data) print("Iteration: %d Loss: %f" % (iteration, loss.data)) st = time.time() coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except RuntimeError as e: coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'})
st = time.time() coil_logger.add_message('Finished', {}) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) except RuntimeError as e: coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'}) # for testing and debugging if __name__ == '__main__': merge_with_yaml(os.path.join('configs/nocrash/resnet34imnet10S1.yaml')) print(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) for m in model.perception.parameters(): print(type(m), m.requires_grad) print('checkpoint loaded') checkpoint = torch.load( '/is/sg2/aprakash/Projects/carla_autonomous_driving/code/coiltraine/_logs/dagger/resnet34imnet10S1/checkpoints/600000.pth', map_location='cpu') model.load_state_dict(checkpoint['state_dict']) for m in model.perception.parameters(): m.requires_grad = False print(type(m), m.requires_grad)
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus augmenter = Augmenter(None) dataset = CoILDataset(args.dataset_path, transform=augmenter, preload_name=args.preload_name) dataloader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) model = model.cuda() checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['state_dict']) model.eval() print(len(dataset)) save_dir = os.path.join(args.gradcam_path, args.type) if not os.path.isdir(save_dir): os.mkdir(save_dir) count = 0 for data in dataloader:
class CoILBaselineCEXP(Agent): def setup(self, path_to_config_file): yaml_conf, checkpoint_number, agent_name, encoder_params = checkpoint_parse_configuration_file( path_to_config_file) # Take the checkpoint name and load it if encoder_params is not None: self.checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2] + '_' + str(encoder_params['encoder_checkpoint']), 'checkpoints', str(checkpoint_number) + '.pth')) # Once the ENCODER_MODEL_CONFIGURATION was defined, we use the pre-trained encoder model to extract bottleneck Z and drive the E-t-E agent self.encoder_checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) self.encoder_model = CoILModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.encoder_model.load_state_dict( self.encoder_checkpoint['state_dict']) self.encoder_model.cuda() self.encoder_model.eval() else: self.checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2], 'checkpoints', str(checkpoint_number) + '.pth')) # do the merge here # TODO THE MERGE IS REQUIRED DEPENDING ON THE SITUATION g_conf.immutable(False) merge_with_yaml( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), yaml_conf), encoder_params) self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 # TODO: Merge with Felipe's code self._msn = None self._lat_ref = 0 self._lon_ref = 0 # Check the agent name self._name = agent_name self.count = 0 def sensors(self): sensors = [{ 'type': 'sensor.camera.rgb', 'x': 2.0, 'y': 0.0, 'z': 1.40, 'roll': 0.0, 'pitch': -15.0, 'yaw': 0.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'rgb' }, { 'type': 'sensor.can_bus', 'reading_frequency': 25, 'id': 'can_bus' }, { 'type': 'sensor.other.gnss', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'id': 'GPS' }] return sensors """ def make_state(self, exp): # state is divided in three parts, the speed, the angle_error, the high level command # Get the closest waypoint #waypoint, _ = self._get_current_wp_direction(exp._ego_actor.get_transform().location, exp._route) #norm, angle = compute_magnitude_angle(waypoint.location, exp._ego_actor.get_transform().location, # exp._ego_actor.get_transform().rotation.yaw) #return np.array([_get_forward_speed(exp._ego_actor) / 12.0, # Normalize to by dividing by 12 # angle / 180.0]) self._global_plan = exp._route input_data = exp._sensor_interface.get_data() # TODO this should be capilarized #if 'scenario' in g_conf.MEASUREMENTS_INPUTS: # scenario = convert_scenario_name_number(exp._environment_data['exp_measurements']) # input_data.update({'scenario': scenario}) return input_data """ def make_state(self, exp): """ This function also do the necessary processing of the state for the run step function :param exp: :return: """ self._global_plan = exp._route # we also need to get the latitute longitude ref # TODO this needs to be adpated for a CARLA challenge submission self._lat_ref = exp._lat_ref self._lon_ref = exp._lon_ref input_data = exp._sensor_interface.get_data() self._vehicle_pos = exp._ego_actor.get_transform().location # TODO this should be capilarized input_data.update( {'sensor_input': self._process_sensors(input_data['rgb'][1])}) if self._msn is not None: input_data.update( {'scenario': self._msn(input_data['sensor_input'])}) #if 'scenario' in g_conf.MEASUREMENTS_INPUTS: # scenario = convert_scenario_name_number(exp._environment_data['exp_measurements']) # print (" SCENARIO NUMBER ", scenario) # input_data.update({'scenario': scenario}) return input_data def run_step(self, input_data): # Get the current directions for following the route directions = self._get_current_direction(self._vehicle_pos) logging.debug(" Current direction %f ", directions) # Take the forward speed and normalize it for it to go from 0-1 network_input = input_data['can_bus'][1]['speed'] / g_conf.SPEED_FACTOR network_input = torch.cuda.FloatTensor([network_input]).unsqueeze(0) # TODO remove ifs #if 'scenario' in g_conf.MEASUREMENTS_INPUTS: # network_input = torch.cat((torch.cuda.FloatTensor([input_data['scenario']]), # network_input), 1) # Compute the forward pass processing the sensors got from CARLA. # TODO we start with an if but we can build a class hierarquical ! if g_conf.MODEL_TYPE in [ 'coil-icra', 'coil-icra-KLD', 'separate-supervised' ]: directions_tensor = torch.cuda.LongTensor([directions]) #print(" Directions ", int(directions)) if False: save_path = os.path.join('temp', 'ete_baseline') if not os.path.exists(save_path): os.mkdir(save_path) save_image( input_data['sensor_input'], os.path.join( save_path, 'run_input_' + str(self.count).zfill(5) + ".png")) self.count += 1 model_outputs = self._model.forward_branch( input_data['sensor_input'], network_input, directions_tensor) elif g_conf.MODEL_TYPE in ['coil-icra-VAE']: directions_tensor = torch.cuda.LongTensor([directions]) if g_conf.ENCODER_MODEL_TYPE in ['VAE']: if g_conf.LABELS_SUPERVISED: input = torch.cat( (input_data['sensor_input'], torch.zeros( 1, 1, 88, 200).cuda()), dim=1) recon_x, mu, _, z = self.encoder_model(input) else: recon_x, mu, _, z = self.encoder_model( input_data['sensor_input']) elif g_conf.ENCODER_MODEL_TYPE in ['Affordances']: mu, _ = self.encoder_model(input_data['sensor_input']) if False: save_path = os.path.join('temp', 'affordances_upperbound') if not os.path.exists(save_path): os.mkdir(save_path) if g_conf.LABELS_SUPERVISED: save_image( input_data['sensor_input'], os.path.join( save_path, 'run_input_' + str(self.count).zfill(5) + ".png")) split = torch.split(torch.squeeze(recon_x, dim=1), [3, 1], dim=1) save_image( split[0], os.path.join( save_path, 'run_recon_rgb_' + str(self.count).zfill(5) + ".png")) save_image( split[1], os.path.join( save_path, 'run_recon_labels_' + str(self.count).zfill(5) + ".png")) else: save_image( input_data['sensor_input'], os.path.join( save_path, 'run_input_' + str(self.count).zfill(5) + ".png")) #save_image(recon_x, os.path.join(save_path, 'run_recon_' + str(self.count).zfill(5) + ".png")) self.count += 1 model_outputs = self._model.forward_branch(mu, network_input, directions_tensor) #print(' frame', self.count) #print(' direction', directions_tensor) #print(' branch output', model_outputs) elif g_conf.MODEL_TYPE in [ 'separate-supervised-NoSpeed', 'coil-icra-NoSpeed' ]: directions_tensor = torch.cuda.LongTensor([directions]) if False: save_path = os.path.join('temp', 'ETE_resnet34_6') if not os.path.exists(save_path): os.mkdir(save_path) save_image( input_data['sensor_input'], os.path.join( save_path, 'run_input_' + str(self.count).zfill(5) + ".png")) self.count += 1 model_outputs = self._model.forward_branch( input_data['sensor_input'], directions_tensor) else: directions_tensor = torch.cuda.FloatTensor( encode_directions(directions)) model_outputs = self._model.forward( self._process_sensors(input_data['rgb'][1]), network_input, directions_tensor)[0] steer, throttle, brake = self._process_model_outputs(model_outputs[0]) control = carla.VehicleControl() control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) logging.debug("Output %f %f %f " % (control.steer, control.throttle, control.brake)) if self.first_iter: coil_logger.add_message('Iterating', { "Checkpoint": self.checkpoint['iteration'], 'Agent': str(steer) }, self.checkpoint['iteration']) # There is the posibility to replace some of the predictions with oracle predictions. self.first_iter = False #print(['steer: ', control.steer, 'throttle: ', control.throttle, 'brake: ', control.brake]) return control def get_attentions(self, layers=None): """ Returns The activations obtained from the first layers of the latest iteration. """ if layers is None: layers = [0, 1, 2] if self.latest_image_tensor is None: raise ValueError( 'No step was ran yet. ' 'No image to compute the activations, Try Running ') all_layers = self._model.get_perception_layers( self.latest_image_tensor) cmap = plt.get_cmap('inferno') attentions = [] for layer in layers: y = all_layers[layer] att = torch.abs(y).mean(1)[0].data.cpu().numpy() att = att / att.max() att = cmap(att) att = np.delete(att, 3, 2) attentions.append(imresize(att, [88, 200])) return attentions def _process_sensors(self, sensor): sensor = sensor[:, :, 0:3] # BGRA->BRG drop alpha channel sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :, :] # crop sensor = scipy.misc.imresize(sensor, (g_conf.SENSORS['rgb_central'][1], g_conf.SENSORS['rgb_central'][2])) self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type( torch.FloatTensor).cuda() image_input = sensor.unsqueeze(0) self.latest_image_tensor = image_input return image_input def _get_current_direction(self, vehicle_position): #print(" number of waypoints in global plan:", len(self._global_plan)) # for the current position and orientation try to get the closest one from the waypoints closest_id = 0 min_distance = 100000 for index in range(len(self._global_plan)): waypoint = self._global_plan[index][0] computed_distance = distance_vehicle(waypoint, vehicle_position) if computed_distance < min_distance: min_distance = computed_distance closest_id = index #print(" closest waypoint", closest_id) logging.debug("Closest waypoint {} dist {}".format( closest_id, min_distance)) direction = self._global_plan[closest_id][1] if direction == RoadOption.LEFT: direction = 3.0 elif direction == RoadOption.RIGHT: direction = 4.0 elif direction == RoadOption.STRAIGHT: direction = 5.0 else: direction = 2.0 return direction def _process_model_outputs(self, outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] if brake < 0.05: brake = 0.0 if throttle > brake: brake = 0.0 return steer, throttle, brake def _expand_commands(self, topological_plan): """ The idea is to make the intersection indications to last longer""" # O(2*N) algorithm , probably it is possible to do in O(N) with queues. # Get the index where curves start and end curves_start_end = [] inside = False start = -1 current_curve = RoadOption.LANEFOLLOW for index in range(len(topological_plan)): command = topological_plan[index][1] if command != RoadOption.LANEFOLLOW and not inside: inside = True start = index current_curve = command if command == RoadOption.LANEFOLLOW and inside: inside = False # End now is the index. curves_start_end.append([start, index, current_curve]) if start == -1: raise ValueError("End of curve without start") start = -1 for start_end_index_command in curves_start_end: start_index = start_end_index_command[0] end_index = start_end_index_command[1] command = start_end_index_command[2] # Add the backwards curves ( Before the begginning) for index in range(1, self._expand_command_front + 1): changed_index = start_index - index if changed_index > 0: topological_plan[changed_index] = ( topological_plan[changed_index][0], command) # add the onnes after the end for index in range(0, self._expand_command_back): changed_index = end_index + index if changed_index < len(topological_plan): topological_plan[changed_index] = ( topological_plan[changed_index][0], command) return topological_plan
class CoILBaseline(AutonomousAgent): def setup(self, path_to_config_file): yaml_conf, checkpoint_number = checkpoint_parse_configuration_file(path_to_config_file) # Take the checkpoint name and load it checkpoint = torch.load(os.path.join(os.sep, os.path.join(*os.path.realpath(__file__).split(os.sep)[:-2]), '_logs', yaml_conf.split(os.sep)[-2], yaml_conf.split('/')[-1].split('.')[-2] , 'checkpoints', str(checkpoint_number) + '.pth')) # do the merge here merge_with_yaml(os.path.join(os.sep, os.path.join(*os.path.realpath(__file__).split(os.sep)[:-2]), yaml_conf)) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 self.track = 2 # Track.CAMERAS def sensors(self): sensors = [{'type': 'sensor.camera.rgb', 'x': 2.0, 'y': 0.0, 'z': 1.40, 'roll': 0.0, 'pitch': 0.0, 'yaw': 0.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'rgb'}, {'type': 'sensor.can_bus', 'reading_frequency': 25, 'id': 'can_bus' }, {'type': 'sensor.other.gnss', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'id': 'GPS'} ] return sensors def run_step(self, input_data, timestamp): # Get the current directions for following the route directions = self._get_current_direction(input_data['GPS'][1]) logging.debug("Directions {}".format(directions)) # Take the forward speed and normalize it for it to go from 0-1 norm_speed = input_data['can_bus'][1]['speed'] / g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # Compute the forward pass processing the sensors got from CARLA. model_outputs = self._model.forward_branch(self._process_sensors(input_data['rgb'][1]), norm_speed, directions_tensor) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) control = carla.VehicleControl() control.steer = float(steer) control.throttle = float(throttle) control.brake = float(brake) logging.debug("Output ", control) # There is the posibility to replace some of the predictions with oracle predictions. self.first_iter = False return control def get_attentions(self, layers=None): """ Returns The activations obtained from the first layers of the latest iteration. """ if layers is None: layers = [0, 1, 2] if self.latest_image_tensor is None: raise ValueError('No step was ran yet. ' 'No image to compute the activations, Try Running ') all_layers = self._model.get_perception_layers(self.latest_image_tensor) cmap = plt.get_cmap('inferno') attentions = [] for layer in layers: y = all_layers[layer] att = torch.abs(y).mean(1)[0].data.cpu().numpy() att = att / att.max() att = cmap(att) att = np.delete(att, 3, 2) attentions.append(scipy.misc.imresize(att, [88, 200])) return attentions def _process_sensors(self, sensor): sensor = sensor[:, :, 0:3] # BGRA->BRG drop alpha channel sensor = sensor[:, :, ::-1] # BGR->RGB # sensor = sensor[g_conf.IMAGE_CUT[0]:g_conf.IMAGE_CUT[1], :, :] # crop # TODO: don't cut sensor = scipy.misc.imresize(sensor, (g_conf.SENSORS['rgb'][1], g_conf.SENSORS['rgb'][2])) self.latest_image = sensor sensor = np.swapaxes(sensor, 0, 1) sensor = np.transpose(sensor, (2, 1, 0)) sensor = torch.from_numpy(sensor / 255.0).type(torch.FloatTensor).cuda() image_input = sensor.unsqueeze(0) self.latest_image_tensor = image_input return image_input def _get_current_direction(self, vehicle_position): # for the current position and orientation try to get the closest one from the waypoints closest_id = 0 min_distance = 100000 for index in range(len(self._global_plan)): waypoint = self._global_plan[index][0] computed_distance = distance_vehicle(waypoint, vehicle_position) if computed_distance < min_distance: min_distance = computed_distance closest_id = index print(f'Closest waypoint {closest_id} dist {min_distance}') direction = self._global_plan[closest_id][1] print("Direction ", direction) if direction == RoadOption.LEFT: direction = 3.0 elif direction == RoadOption.RIGHT: direction = 4.0 elif direction == RoadOption.STRAIGHT: direction = 5.0 else: direction = 2.0 return direction @staticmethod def _process_model_outputs(outputs): """ A bit of heuristics in the control, to eventually make car faster, for instance. Returns: """ steer, throttle, brake = outputs[0], outputs[1], outputs[2] if brake < 0.05: brake = 0.0 if throttle > brake: brake = 0.0 return steer, throttle, brake def _expand_commands(self, topological_plan): """ The idea is to make the intersection indications to last longer""" # O(2*N) algorithm , probably it is possible to do in O(N) with queues. # Get the index where curves start and end curves_start_end = [] inside = False start = -1 current_curve = RoadOption.LANEFOLLOW for index in range(len(topological_plan)): command = topological_plan[index][1] if command != RoadOption.LANEFOLLOW and not inside: inside = True start = index current_curve = command if command == RoadOption.LANEFOLLOW and inside: inside = False # End now is the index. curves_start_end.append([start, index, current_curve]) if start == -1: raise ValueError("End of curve without start") start = -1 for start_end_index_command in curves_start_end: start_index = start_end_index_command[0] end_index = start_end_index_command[1] command = start_end_index_command[2] # Add the backwards curves ( Before the begginning) for index in range(1, self._expand_command_front + 1): changed_index = start_index - index if changed_index > 0: topological_plan[changed_index] = (topological_plan[changed_index][0], command) # add the onnes after the end for index in range(0, self._expand_command_back): changed_index = end_index + index if changed_index < len(topological_plan): topological_plan[changed_index] = (topological_plan[changed_index][0], command) return topological_plan
def setup(self, path_to_config_file): yaml_conf, checkpoint_number, agent_name, encoder_params = checkpoint_parse_configuration_file( path_to_config_file) # Take the checkpoint name and load it if encoder_params is not None: self.checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2] + '_' + str(encoder_params['encoder_checkpoint']), 'checkpoints', str(checkpoint_number) + '.pth')) # Once the ENCODER_MODEL_CONFIGURATION was defined, we use the pre-trained encoder model to extract bottleneck Z and drive the E-t-E agent self.encoder_checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) self.encoder_model = CoILModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.encoder_model.load_state_dict( self.encoder_checkpoint['state_dict']) self.encoder_model.cuda() self.encoder_model.eval() else: self.checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2], 'checkpoints', str(checkpoint_number) + '.pth')) # do the merge here # TODO THE MERGE IS REQUIRED DEPENDING ON THE SITUATION g_conf.immutable(False) merge_with_yaml( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), yaml_conf), encoder_params) self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 # TODO: Merge with Felipe's code self._msn = None self._lat_ref = 0 self._lon_ref = 0 # Check the agent name self._name = agent_name self.count = 0
def execute(gpu, exp_batch, exp_alias): # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join(exp_batch, exp_alias+'.yaml')) set_type_of_process('train') sys.stdout = open(str(os.getpid()) + ".out", "a", buffering=1) if monitorer.get_status(exp_batch, exp_alias, g_conf.PROCESS_NAME)[0] == "Finished": # TODO: print some cool summary or not ? return #Define the dataset. This structure is has the __get_item__ redefined in a way #that you can access the HDFILES positions from the root directory as a in a vector. full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], g_conf.DATASET_NAME) dataset = CoILDataset(full_dataset, transform=transforms.Compose([transforms.ToTensor()])) # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. sampler = CoILSampler(splitter.control_steer_split(dataset.measurements, dataset.meta_data)) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. # TODO: batch size an number of workers go to some configuration file data_loader = torch.utils.data.DataLoader(dataset, sampler=sampler, batch_size=120, shuffle=False, num_workers=12, pin_memory=True) # By instanciating the augmenter we get a callable that augment images and transform them # into tensors. augmenter = iag.Augmenter(g_conf.AUGMENTATION_SUITE) # TODO: here there is clearly a posibility to make a cool "conditioning" system. model = CoILModel(g_conf.MODEL_NAME) model.cuda() print(model) criterion = Loss() # TODO: DATASET SIZE SEEMS WEIRD optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9) checkpoint_file = get_latest_saved_checkpoint() if checkpoint_file != None: checkpoint = torch.load(os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(get_latest_saved_checkpoint()))) iteration = checkpoint['iteration'] else: iteration = 0 # TODO: The checkpoint will continue, so the logs should restart ??? OR continue were it was print (dataset.meta_data) print (model) for data in data_loader: input_data, labels = data #TODO we have to divide the input with other data. #TODO, ADD ITERATION SCHEDULE input_rgb_data = augmenter(0, input_data['rgb']) # get the control commands from labels, size = [120,1] controls = labels[:, 24, :] # The output(branches) is a list of 5 branches results, each branch is with size [120,3] model.zero_grad() branches = model(input_rgb_data, labels[:, 10, :].cuda()) #print ("len ",len(branches)) # get the steer, gas and brake ground truth from labels steer_gt = labels[:, 0, :] gas_gt = labels[:, 1, :] brake_gt = labels[:, 2, :] speed_gt = labels[:, 10, :] targets = torch.cat([steer_gt, gas_gt, brake_gt], 1) loss = criterion.MSELoss(branches, targets.cuda(), controls.cuda(), speed_gt.cuda()) loss.backward() optimizer.step() # TODO: save also the optimizer state dictionary if is_ready_to_save(iteration): state = { 'iteration': iteration, 'state_dict': model.state_dict() } # TODO : maybe already summarize the best model ??? torch.save(state, os.path.join('_logs', exp_batch, exp_alias , 'checkpoints', str(iteration) + '.pth')) iteration += 1
class MPSCAgent(AutonomousAgent): def setup(self, path_to_config_file): yaml_conf, checkpoint_number = checkpoint_parse_configuration_file(path_to_config_file) # Take the checkpoint name and load it checkpoint = torch.load(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2] , 'checkpoints', str(checkpoint_number) + '.pth')) # merge the specific agent config with global config _g_conf merge_with_yaml(os.path.join('/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), yaml_conf)) self.checkpoint = checkpoint # We save the checkpoint for some interesting future use. # TODO: retrain the model with MPSC self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 # check map waypoint format => carla_data_provider & http://carla.org/2018/11/16/release-0.9.1/ # e.g. from map.get_waypoint Waypoint(Transform(Location(x=338.763, y=226.453, z=0), Rotation(pitch=360, yaw=270.035, roll=0))) self.track = Track.ALL_SENSORS_HDMAP_WAYPOINTS # specify available track info, see autonomous_agent.py def sensors(self): # currently give the full suite of available sensors # check the config/installation of the sensors => https://carla.readthedocs.io/en/latest/cameras_and_sensors/ sensors = [{'type': 'sensor.camera.rgb', 'x': 0.7, 'y': 0.0, 'z': 1.60, 'roll':0.0, 'pitch':0.0, 'yaw': 0.0, 'width': 800, 'height': 600, 'fov':100, 'id': 'Center'}, {'type': 'sensor.camera.rgb', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, 'yaw': -45.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'Left'}, {'type': 'sensor.camera.rgb', 'x': 0.7, 'y': 0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, 'yaw': 45.0, 'width': 800, 'height': 600, 'fov': 100, 'id': 'Right'}, {'type': 'sensor.lidar.ray_cast', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'roll': 0.0, 'pitch': 0.0, 'yaw': -45.0, 'id': 'LIDAR'}, {'type': 'sensor.other.gnss', 'x': 0.7, 'y': -0.4, 'z': 1.60, 'id': 'GPS'}, {'type': 'sensor.can_bus', 'reading_frequency': 25, 'id': 'can_bus'}, {'type': 'sensor.hd_map', 'reading_frequency': 1, 'id': 'hdmap'}, ] return sensors def run_step(self, input_data, timestamp): # the core method # TODO # 1. request current localization # input_data is obtained from sensors. => autonomous_agent.py def __call__(self) for key, value in input_data.items(): print("input_data ", key, value) # # ======[Agent] Wallclock_time = 2019-07-08 14:26:54.522155 / Sim_time = 1.4500000216066837 # input_data key GPS (3755, array([49.00202793, 8.00463308, 1.58916414])) # input_data key can_bus (43, {'moi': 1.0, 'center_of_mass': {'x': 60.0, 'y': 0.0, 'z': -60.0}, 'linear_velocity': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0e2348>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e2450>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e2608>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0e22f0>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e2870>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e26b8>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0e2500>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e2818>, # <carla.libcarla.Vector3D object at 0x7fb4fa0ddfa8>]], dtype=object), 'speed': -1.6444947256841175e-06, 'lateral_speed': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0e4ad8>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e49d0>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e23a0>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0e48c8>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e4ce8>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e23f8>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0e4d40>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e4c90>, # <carla.libcarla.Vector3D object at 0x7fb4fa0e28c8>]], dtype=object), 'transform': <carla.libcarla.Transform object at 0x7fb4fa0de3f0>, 'damping_rate_zero_throttle_clutch_disengaged': 0.3499999940395355, 'max_rpm': 6000.0, 'clutch_strength': 10.0, 'drag_coefficient': 0.30000001192092896, 'linear_acceleration': array([[<carla.libcarla.Vector3D object at 0x7fb4fa0dd0e0>, # <carla.libcarla.Vector3D object at 0x7fb4fa0ddf50>, # <carla.libcarla.Vector3D object at 0x7fb4fa0d58c8>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0dd088>, # <carla.libcarla.Vector3D object at 0x7fb4fa0dd138>, # <carla.libcarla.Vector3D object at 0x7fb4fa0d5088>], # [<carla.libcarla.Vector3D object at 0x7fb4fa0dd1e8>, # <carla.libcarla.Vector3D object at 0x7fb4fa0f6d98>, # <carla.libcarla.Vector3D object at 0x7fb4fa0d5920>]], dtype=object), 'damping_rate_full_throttle': 0.15000000596046448, 'use_gear_autobox': True, 'torque_curve': [{'x': 0.0, 'y': 400.0}, {'x': 1890.7607421875, 'y': 500.0}, {'x': 5729.57763671875, 'y': 400.0}], 'dimensions': {'width': 0.9279687404632568, 'height': 0.6399999856948853, 'length': 2.4543750286102295}, 'steering_curve': [{'x': 0.0, 'y': 1.0}, {'x': 20.0, 'y': 0.8999999761581421}, {'x': 60.0, 'y': 0.800000011920929}, {'x': 120.0, 'y': 0.699999988079071}], 'mass': 1850.0, 'wheels': [{'tire_friction': 3.5, 'steer_angle': 70.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 70.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 0.0, 'damping_rate': 0.25, 'disable_steering': False}, {'tire_friction': 3.5, 'steer_angle': 0.0, 'damping_rate': 0.25, 'disable_steering': False}]}) # input_data key rgb (3753, array([[[135, 118, 110, 255], # [135, 118, 110, 255], # [136, 119, 110, 255], # ..., # [[114, 108, 105, 255], # [110, 105, 102, 255], # [112, 106, 104, 255], # ..., # [118, 112, 109, 255], # [118, 112, 109, 255], # [121, 115, 113, 255]]], dtype=uint8)) # Direction RoadOption.LANEFOLLOW # ego_trans # Transform(Location(x=338.763, y=226.453, z=-0.0109183), Rotation(pitch=0.000136604, yaw=-89.9654, roll=-0.000274658)) # 1.9995784804148584/0.0 localization = input_data['GPS'] directions = self._get_current_direction(input_data['GPS'][1]) logging.debug("Directions {}".format(directions)) # 2. get recommended action from the NN controller (copy from CoILBaseline) # Take the forward speed and normalize it for it to go from 0-1 norm_speed = input_data['can_bus'][1]['speed'] / g_conf.SPEED_FACTOR norm_speed = torch.cuda.FloatTensor([norm_speed]).unsqueeze(0) directions_tensor = torch.cuda.LongTensor([directions]) # End-to-end part, feed in images from rgb sensor, then parse network output as controller # Compute the forward pass processing the sensors got from CARLA. model_outputs = self._model.forward_branch(self._process_sensors(input_data['rgb'][1]), norm_speed, directions_tensor) steer, throttle, brake = self._process_model_outputs(model_outputs[0]) # 3. use inner-loop to simulate/approximate vehicle model # save the NN output as vehicle control sim_control = carla.VehicleControl() sim_control.steer = float(steer) sim_control.throttle = float(throttle) sim_control.brake = float(brake) logging.debug("inner loop for sim_control", sim_control) # TODO # copy a "parallel world" and create a "virtual agent" that has the same state with ego_vehicle sim_world = self.world # TODO: check how to copy the world, roads info are necessary, the rest optional sim_ego = sim_world.create_ego_vehicle(current_ego_states) sim_world.agent_instance = getattr(sim_world.module_agent, sim_world.module_agent.__name__)(args.config) correct_sensors, error_message = sim_world.valid_sensors_configuration(sim_world.sim_agent, sim_world.track) # pass the sim_control to virtual agent and run T timesteps sim_ego.apply_control(sim_control) # use current model to predict the following state-action series MPSC_controls = [] # TODO: check where u should init it for i in range(T): sim_ego.run_step() # TODO def run_step, update for sim_ego sim_ego.update() # 4. use MPSC to check safety at each future timestep safe = MPSC.check_safety(sim_ego.state, safety_boundary) if not safe: # if not safe, obtain MPSC control output logging.debug("use MPSC controller") control = MPSC_control MPSC_controls.append(MPSC_control) # collect all "safe" o/p # 7. execute MPSC control and add it to new dataset break else: if i < T-1: continue else: # final step # if safe within all T timesteps, proceed to use NN control output logging.debug("use NN controller") control = sim_control # 8. retrain the network and/or do policy aggregation if len(MPSC_controls): self.model.train(self.model, MPSC_controls) logging.debug("Control output ", control) # There is the posibility to replace some of the predictions with oracle predictions. self.first_iter = False return control