def test_check_status_not_existent(self): # Check if status could be check for unexistent experiments g_conf.immutable(False) status = monitorer.get_status('monitor_test', 'experiment_25.yaml', g_conf.PROCESS_NAME) self.assertEqual(status[0], "Does Not Exist")
def test_check_status_to_run(self): # Check for an experiment that exists in the config files but has not been started g_conf.immutable(False) g_conf.NAME = 'experiment_to_run' status = monitorer.get_status('monitor_test', 'experiment_to_run.yaml', g_conf.PROCESS_NAME) self.assertEqual(status[0], "Not Started")
def print_folder_process_names(exp_batch): experiments_list = os.listdir(os.path.join('configs', exp_batch)) sort_nicely(experiments_list) for experiment in experiments_list: if '.yaml' in experiment: g_conf.immutable(False) merge_with_yaml(os.path.join('configs', exp_batch, experiment)) print(experiment.split('.')[-2] + ': ' + g_conf.EXPERIMENT_GENERATED_NAME)
def test_basic_data(self): # the town2-town01 data, try to load. g_conf.immutable(False) g_conf.EXPERIMENT_NAME = 'coil_icra' create_log_folder('sample') create_exp_path('sample', 'coil_icra') merge_with_yaml('configs/sample/coil_icra.yaml') set_type_of_process('train') full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], 'CoILTrain') dataset = CoILDataset(full_dataset, transform=None, preload_name=str(g_conf.NUMBER_OF_HOURS) + 'hours_' + g_conf.TRAIN_DATASET_NAME)
def test_town3_data(self): # the town3 data has different names and does not have pedestrians of vehicle stop # indications g_conf.immutable(False) g_conf.EXPERIMENT_NAME = 'resnet34imnet' create_log_folder('town03') create_exp_path('town03', 'resnet34imnet') merge_with_yaml('configs/town03/resnet34imnet.yaml') set_type_of_process('train') full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], 'CoILTrainTown03') dataset = CoILDataset(full_dataset, transform=None, preload_name=str(g_conf.NUMBER_OF_HOURS) + 'hours_' + g_conf.TRAIN_DATASET_NAME)
def test_check_status_error(self): g_conf.immutable(False) # TODO: THe error ? How do nicely merge with the other parts ?? g_conf.NAME = 'experiment_running_error' # TODO: this merge is weird. merge_with_yaml('configs/monitor_test/experiment_running_error.yaml') # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS set_type_of_process('train') coil_logger.add_message( 'Loading', {"Keys_Division": [1, 123, 1, 1, 2, 12, 3, 12, 31, 2, 1, 1]}) coil_logger.add_message('Loading', { "Models_loaded": ' VUALA ', "Checkpoint": "988765" }) for i in range(0, 10): coil_logger.add_message('Iterating', { "Iteration": i, "ReadKeys": [1, 123, 5, 1, 34, 1, 23] }) coil_logger.add_message('Iterating', { "Iteration": i, "Output": ["output"] }) coil_logger.add_message('Error', { "Iteration": 10, "Message": " Some data integrity problems ! " }) # TODO: Check how the alias will work. status = monitorer.get_status('monitor_test', 'experiment_running_error.yaml', g_conf.PROCESS_NAME) self.assertEqual(status[0], "Error") print(status[1])
def test_check_status_finished(self): g_conf.immutable(False) g_conf.NAME = 'experiment_finished' # TODO: this merge is weird. merge_with_yaml('configs/monitor_test/experiment_finished.yaml') g_conf.NUMBER_ITERATIONS = 20 # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS set_type_of_process('train') # We set the number of iterations as coil_logger.add_message( 'Loading', {"Keys_Division": [1, 123, 1, 1, 2, 12, 3, 12, 31, 2, 1, 1]}) coil_logger.add_message('Loading', { "Models_loaded": ' VUALA ', "Checkpoint": "988765" }) for i in range(0, 21): coil_logger.add_message('Iterating', { "Iteration": i, "ReadKeys": [1, 123, 5, 1, 34, 1, 23] }, i) coil_logger.add_message('Iterating', { "Iteration": i, "Output": ["output"] }, i) # TODO: Check how the alias will work. status = monitorer.get_status('monitor_test', 'experiment_finished.yaml', g_conf.PROCESS_NAME) self.assertEqual(status[0], "Finished")
def test_check_status_running_loading(self): g_conf.immutable(False) g_conf.NAME = 'experiment_running_loading' # TODO: this merge is weird. merge_with_yaml('configs/monitor_test/experiment_running_loading.yaml') # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS set_type_of_process('train') coil_logger.add_message( 'Loading', {"Keys_Division": [1, 123, 1, 1, 2, 12, 3, 12, 31, 2, 1, 1]}) coil_logger.add_message('Loading', { "Models_loaded": ' VUALA ', "Checkpoint": "988765" }) # TODO: Check how the alias will work. status = monitorer.get_status('monitor_test', 'experiment_running_loading.yaml', g_conf.PROCESS_NAME) self.assertEqual(status[0], "Loading")
def setup(self, path_to_config_file): yaml_conf, checkpoint_number, agent_name, encoder_params = checkpoint_parse_configuration_file( path_to_config_file) # Take the checkpoint name and load it if encoder_params is not None: self.checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2] + '_' + str(encoder_params['encoder_checkpoint']), 'checkpoints', str(checkpoint_number) + '.pth')) # Once the ENCODER_MODEL_CONFIGURATION was defined, we use the pre-trained encoder model to extract bottleneck Z and drive the E-t-E agent self.encoder_checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) self.encoder_model = CoILModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.encoder_model.load_state_dict( self.encoder_checkpoint['state_dict']) self.encoder_model.cuda() self.encoder_model.eval() else: self.checkpoint = torch.load( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), '_logs', yaml_conf.split('/')[-2], yaml_conf.split('/')[-1].split('.')[-2], 'checkpoints', str(checkpoint_number) + '.pth')) # do the merge here # TODO THE MERGE IS REQUIRED DEPENDING ON THE SITUATION g_conf.immutable(False) merge_with_yaml( os.path.join( '/', os.path.join(*os.path.realpath(__file__).split('/')[:-2]), yaml_conf), encoder_params) self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) self.first_iter = True logging.info("Setup Model") # Load the model and prepare set it for evaluation self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() self.latest_image = None self.latest_image_tensor = None # We add more time to the curve commands self._expand_command_front = 5 self._expand_command_back = 3 # TODO: Merge with Felipe's code self._msn = None self._lat_ref = 0 self._lon_ref = 0 # Check the agent name self._name = agent_name self.count = 0
def plot_folder_summaries(exp_batch, train, validation_datasets, drive_environments, verbose=False): """ Main plotting function for the folder mode. Args: exp_batch: The exp batch (folder) being plotted on the screen. train: If train process is being printed validation_datasets: The validation datasets being computed drive_environments: The driving environments/ Benchmarks verbose: Returns: None """ os.system('clear') process_names = [] if train: process_names.append('train') for val in validation_datasets: process_names.append('validation' + '_' + val) # We save the drive files to be used later drive_files = {} for drive in drive_environments: drive_files.update({ ('drive' + '_' + drive.split('/')[-1].split('.')[0]): drive }) process_names.append('drive' + '_' + drive.split('/')[-1].split('.')[0]) experiments_list = os.listdir(os.path.join('configs', exp_batch)) experiments_list = [ experiment.split('.')[-2] for experiment in experiments_list ] names_list = get_names(exp_batch) sorted_keys = sorted( range(len(names_list)), key=lambda k: names_list[experiments_list[k] + '.yaml']) for key in sorted_keys: experiment = experiments_list[key] generated_name = names_list[experiment + '.yaml'] if experiment == '': raise ValueError("Empty Experiment on List") g_conf.immutable(False) merge_with_yaml( os.path.join('configs', exp_batch, experiment + '.yaml')) print(BOLD + experiment + ' : ' + generated_name + END) for process in process_names: try: output = get_status(exp_batch, experiment, process) except: # TODO: bad design. But the printing should not stop for any error on reading import traceback traceback.print_exc() status = output[0] summary = output[1] print(' ', process) if status == 'Not Started': print(' STATUS: ', BOLD + status + END) elif status == 'Loading': print(' STATUS: ', YELLOW + status + END, ' - ', YELLOW + summary + END) elif status == 'Iterating': print(' STATUS: ', YELLOW + status + END) elif status == 'Finished': print(' STATUS: ', GREEN + status + END) elif status == 'Error': print(' STATUS: ', RED + status + END, ' - ', RED + summary + END) if status == 'Iterating': if 'train' == process: print_train_summary(summary[status]) if 'validation' in process: if summary[1] != '': # If it has no summary we don't plot print_validation_summary(summary[0][status], summary[1][status]['Summary'], verbose) else: print_validation_summary(summary[0][status], '', verbose) if 'drive' in process: if 'Agent' not in summary[status]: continue checkpoint = summary[status]['Checkpoint'] # Get the sta # This contain the results from completed iterations # we read the json file directly agent_checkpoint_name = str(exp_batch) + '_' + str( experiment) + '_' + str(checkpoint) print_drive_summary(drive_files[process], agent_checkpoint_name, checkpoint)
def setup(self, path_to_config_file): self._agent = None self.route_assigned = False self.count = 0 exp_dir = os.path.join( '/', os.path.join(*path_to_config_file.split('/')[:-1])) yaml_conf, checkpoint_number, agent_name, encoder_params = checkpoint_parse_configuration_file( path_to_config_file) if encoder_params == "None": encoder_params = None g_conf.immutable(False) merge_with_yaml( os.path.join('/', os.path.join(*path_to_config_file.split('/')[:-4]), yaml_conf), encoder_params) if g_conf.MODEL_TYPE in ['one-step-affordances']: # one step training, no need to retrain FC layers, we just get the output of encoder model as prediciton self._model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '.pth')) print("Affordances Model ", str(checkpoint_number) + '.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval() elif g_conf.MODEL_TYPE in ['separate-affordances']: if encoder_params is not None: self.encoder_model = EncoderModel( g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) self.encoder_model.cuda() # Here we load the pre-trained encoder (not fine-tunned) if g_conf.FREEZE_ENCODER: encoder_checkpoint = torch.load( os.path.join( os.path.join( '/', os.path.join( *path_to_config_file.split('/')[:-4])), '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) print( "Encoder model ", str(encoder_params['encoder_checkpoint']), "loaded from ", os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints')) self.encoder_model.load_state_dict( encoder_checkpoint['state_dict']) self.encoder_model.eval() for param_ in self.encoder_model.parameters(): param_.requires_grad = False else: encoder_checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '_encoder.pth')) print("FINE TUNNED encoder model ", str(checkpoint_number) + '_encoder.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self.encoder_model.load_state_dict( encoder_checkpoint['state_dict']) self.encoder_model.eval() for param_ in self.encoder_model.parameters(): param_.requires_grad = False else: raise RuntimeError( 'encoder_params can not be None in MODEL_TYPE --> separate-affordances' ) self._model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) self.checkpoint = torch.load( os.path.join(exp_dir, 'checkpoints', str(checkpoint_number) + '.pth')) print("Affordances Model ", str(checkpoint_number) + '.pth', "loaded from ", os.path.join(exp_dir, 'checkpoints')) self._model.load_state_dict(self.checkpoint['state_dict']) self._model.cuda() self._model.eval()
def execute(gpu, exp_batch, exp_alias, json_file_path, suppress_output, encoder_params=None, plot_attentions=False): try: # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu if json_file_path is not None: json_file_name = json_file_path.split('/')[-1].split('.')[-2] else: raise RuntimeError( "You need to define the validation json file path") # At this point the log file with the correct naming is created. merge_with_yaml( os.path.join('configs', exp_batch, exp_alias + '.yaml'), encoder_params) if plot_attentions: set_type_of_process('validation', json_file_name + '_plotAttention') else: set_type_of_process('validation', json_file_name) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') if suppress_output: sys.stdout = open(os.path.join( '_output_logs', exp_alias + '_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) # We create file for saving validation results summary_file = os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, g_conf.PROCESS_NAME + '_csv', 'valid_summary_1camera.csv') g_conf.immutable(False) g_conf.DATA_USED = 'central' g_conf.immutable(True) if not os.path.exists(summary_file): csv_outfile = open(summary_file, 'w') csv_outfile.write( "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s\n" % ('step', 'accumulated_pedestrian_TP', 'accumulated_pedestrian_FP', 'accumulated_pedestrian_FN', 'accumulated_pedestrian_TN', 'accumulated_vehicle_stop_TP', 'accumulated_vehicle_stop_FP', 'accumulated_vehicle_stop_FN', 'accumulated_vehicle_stop_TN', 'accumulated_red_tl_TP', 'accumulated_red_tl_FP', 'accumulated_red_tl_FN', 'accumulated_red_tl_TN', 'MAE_relative_angle')) csv_outfile.close() latest = get_latest_evaluated_checkpoint_2(summary_file) # Define the dataset. This structure is has the __get_item__ redefined in a way # that you can access the HDFILES positions from the root directory as a in a vector. #full_dataset = os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) augmenter = Augmenter(None) # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(transform=augmenter, preload_name=g_conf.PROCESS_NAME + '_' + g_conf.DATA_USED, process_type='validation', vd_json_file_path=json_file_path) print("Loaded Validation dataset") # Creates the sampler, this part is responsible for managing the keys. It divides # all keys depending on the measurements and produces a set of keys for each bach. # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) if g_conf.MODEL_TYPE in ['one-step-affordances']: # one step training, no need to retrain FC layers, we just get the output of encoder model as prediciton model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) model.cuda() #print(model) elif g_conf.MODEL_TYPE in ['separate-affordances']: model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION, g_conf.ENCODER_MODEL_CONFIGURATION) model.cuda() #print(model) encoder_model = EncoderModel(g_conf.ENCODER_MODEL_TYPE, g_conf.ENCODER_MODEL_CONFIGURATION) encoder_model.cuda() encoder_model.eval() # Here we load the pre-trained encoder (not fine-tunned) if g_conf.FREEZE_ENCODER: if encoder_params is not None: encoder_checkpoint = torch.load( os.path.join( '_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints', str(encoder_params['encoder_checkpoint']) + '.pth')) print( "Encoder model ", str(encoder_params['encoder_checkpoint']), "loaded from ", os.path.join('_logs', encoder_params['encoder_folder'], encoder_params['encoder_exp'], 'checkpoints')) encoder_model.load_state_dict( encoder_checkpoint['state_dict']) encoder_model.eval() for param_ in encoder_model.parameters(): param_.requires_grad = False while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): latest = get_next_checkpoint_2(g_conf.TEST_SCHEDULE, summary_file) if os.path.exists( os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, 'checkpoints', str(latest) + '.pth')): checkpoint = torch.load( os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, 'checkpoints', str(latest) + '.pth')) checkpoint_iteration = checkpoint['iteration'] model.load_state_dict(checkpoint['state_dict']) print("Validation checkpoint ", checkpoint_iteration) model.eval() for param_ in model.parameters(): param_.requires_grad = False # Here we load the fine-tunned encoder if not g_conf.FREEZE_ENCODER and g_conf.MODEL_TYPE not in [ 'one-step-affordances' ]: encoder_checkpoint = torch.load( os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, 'checkpoints', str(latest) + '_encoder.pth')) print( "FINE TUNNED encoder model ", str(latest) + '_encoder.pth', "loaded from ", os.path.join('_logs', exp_batch, g_conf.EXPERIMENT_NAME, 'checkpoints')) encoder_model.load_state_dict( encoder_checkpoint['state_dict']) encoder_model.eval() for param_ in encoder_model.parameters(): param_.requires_grad = False accumulated_mae_ra = 0 accumulated_pedestrian_TP = 0 accumulated_pedestrian_TN = 0 accumulated_pedestrian_FN = 0 accumulated_pedestrian_FP = 0 accumulated_red_tl_TP = 0 accumulated_red_tl_TN = 0 accumulated_red_tl_FP = 0 accumulated_red_tl_FN = 0 accumulated_vehicle_stop_TP = 0 accumulated_vehicle_stop_TN = 0 accumulated_vehicle_stop_FP = 0 accumulated_vehicle_stop_FN = 0 iteration_on_checkpoint = 0 for data in data_loader: if g_conf.MODEL_TYPE in ['one-step-affordances']: c_output, r_output, layers = model.forward_outputs( torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda(), dataset.extract_commands(data).cuda()) elif g_conf.MODEL_TYPE in ['separate-affordances']: if g_conf.ENCODER_MODEL_TYPE in [ 'action_prediction', 'stdim', 'ETEDIM', 'FIMBC', 'one-step-affordances' ]: e, layers = encoder_model.forward_encoder( torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda(), torch.squeeze( dataset.extract_commands(data).cuda())) c_output, r_output = model.forward_test(e) elif g_conf.ENCODER_MODEL_TYPE in [ 'ETE', 'ETE_inverse_model', 'forward', 'ETE_stdim' ]: e, layers = encoder_model.forward_encoder( torch.squeeze(data['rgb'].cuda()), dataset.extract_inputs(data).cuda(), torch.squeeze( dataset.extract_commands(data).cuda())) c_output, r_output = model.forward_test(e) if plot_attentions: attentions_path = os.path.join( '_logs', exp_batch, g_conf.EXPERIMENT_NAME, g_conf.PROCESS_NAME + '_attentions_' + str(latest)) write_attentions(torch.squeeze(data['rgb']), layers, iteration_on_checkpoint, attentions_path) # Accurancy = (TP+TN)/(TP+TN+FP+FN) # F1-score = 2*TP / (2*TP + FN + FP) classification_gt = dataset.extract_affordances_targets( data, 'classification') regression_gt = dataset.extract_affordances_targets( data, 'regression') TP = 0 FN = 0 FP = 0 TN = 0 for i in range(classification_gt.shape[0]): if classification_gt[i, 0] == ( c_output[0][i, 0] < c_output[0][i, 1]).type( torch.FloatTensor) == 1: TP += 1 elif classification_gt[ i, 0] == 1 and classification_gt[i, 0] != ( c_output[0][i, 0] < c_output[0][i, 1]).type(torch.FloatTensor): FN += 1 elif classification_gt[ i, 0] == 0 and classification_gt[i, 0] != ( c_output[0][i, 0] < c_output[0][i, 1]).type(torch.FloatTensor): FP += 1 if classification_gt[i, 0] == ( c_output[0][i, 0] < c_output[0][i, 1]).type( torch.FloatTensor) == 0: TN += 1 accumulated_pedestrian_TP += TP accumulated_pedestrian_TN += TN accumulated_pedestrian_FP += FP accumulated_pedestrian_FN += FN TP = 0 FN = 0 FP = 0 TN = 0 for i in range(classification_gt.shape[0]): if classification_gt[i, 1] == ( c_output[1][i, 0] < c_output[1][i, 1]).type( torch.FloatTensor) == 1: TP += 1 elif classification_gt[ i, 1] == 1 and classification_gt[i, 1] != ( c_output[1][i, 0] < c_output[1][i, 1]).type(torch.FloatTensor): FN += 1 elif classification_gt[ i, 1] == 0 and classification_gt[i, 1] != ( c_output[1][i, 0] < c_output[1][i, 1]).type(torch.FloatTensor): FP += 1 if classification_gt[i, 1] == ( c_output[1][i, 0] < c_output[1][i, 1]).type( torch.FloatTensor) == 0: TN += 1 accumulated_red_tl_TP += TP accumulated_red_tl_TN += TN accumulated_red_tl_FP += FP accumulated_red_tl_FN += FN TP = 0 FN = 0 FP = 0 TN = 0 for i in range(classification_gt.shape[0]): if classification_gt[i, 2] == ( c_output[2][i, 0] < c_output[2][i, 1]).type( torch.FloatTensor) == 1: TP += 1 elif classification_gt[i, 2] == 1 and classification_gt[i, 2] !=\ (c_output[2][i, 0] < c_output[2][i, 1]).type(torch.FloatTensor): FN += 1 elif classification_gt[i, 2] == 0 and classification_gt[i, 2] !=\ (c_output[2][i, 0] < c_output[2][i, 1]).type(torch.FloatTensor): FP += 1 if classification_gt[i, 2] == ( c_output[2][i, 0] < c_output[2][i, 1]).type( torch.FloatTensor) == 0: TN += 1 accumulated_vehicle_stop_TP += TP accumulated_vehicle_stop_TN += TN accumulated_vehicle_stop_FP += FP accumulated_vehicle_stop_FN += FN # if the data was normalized during training, we need to transform it to its unit write_regular_output(checkpoint_iteration, torch.squeeze(r_output[0]), regression_gt[:, 0]) mae_ra = torch.abs(regression_gt[:, 0] - torch.squeeze(r_output[0]).type(torch.FloatTensor)).\ numpy() accumulated_mae_ra += np.sum(mae_ra) if iteration_on_checkpoint % 100 == 0: print( "Validation iteration: %d [%d/%d)] on Checkpoint %d " % (iteration_on_checkpoint, iteration_on_checkpoint, len(data_loader), checkpoint_iteration)) iteration_on_checkpoint += 1 # Here also need a better analysis. TODO divide into curve and other things MAE_relative_angle = accumulated_mae_ra / (len(dataset)) csv_outfile = open(summary_file, 'a') csv_outfile.write( "%s, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f" % (checkpoint_iteration, accumulated_pedestrian_TP, accumulated_pedestrian_FP, accumulated_pedestrian_FN, accumulated_pedestrian_TN, accumulated_vehicle_stop_TP, accumulated_vehicle_stop_FP, accumulated_vehicle_stop_FN, accumulated_vehicle_stop_TN, accumulated_red_tl_TP, accumulated_red_tl_FP, accumulated_red_tl_FN, accumulated_red_tl_TN, MAE_relative_angle)) csv_outfile.write("\n") csv_outfile.close() else: print('The checkpoint you want to validate is not yet ready ', str(latest)) coil_logger.add_message('Finished', {}) print('VALIDATION FINISHED !!') print(' Validation results saved in ==> ', summary_file) except KeyboardInterrupt: coil_logger.add_message('Error', {'Message': 'Killed By User'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest) except RuntimeError as e: if latest is not None: coil_logger.erase_csv(latest) coil_logger.add_message('Error', {'Message': str(e)}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something Happened'}) # We erase the output that was unfinished due to some process stop. if latest is not None: coil_logger.erase_csv(latest)
def test_gpu_poping(self): gpus_list = ['0', '1', '2', '3'] path = 'configs' folder = 'test_exps' experiments_list = os.listdir(os.path.join(path, folder)) experiments_list = [ experiment.split('.')[-2] for experiment in experiments_list ] validation_datasets = ['SmallTest', 'OtherSmallTest'] drive_environments = ['Town01', 'Town02'] allocation_parameters = { 'gpu_value': 3.5, 'train_cost': 2, 'validation_cost': 1.5, 'drive_cost': 1.5 } allocated_gpus = { gpu: allocation_parameters['gpu_value'] for gpu in gpus_list } executing_processes = [] free_gpus, resources_on_most_free_gpu, executing_processes = get_gpu_resources( allocated_gpus, executing_processes, allocation_parameters) print(" Free GPUS, resources on the most free") print(free_gpus, resources_on_most_free_gpu) print("Experiments list") print(experiments_list) tasks_queue = mount_experiment_heap(folder, experiments_list, True, validation_datasets, drive_environments) print("Tasks queue", tasks_queue) executing_processes = [] while True: while resources_on_most_free_gpu > min([allocation_parameters['train_cost'], allocation_parameters['validation_cost'], allocation_parameters['drive_cost']])\ and tasks_queue != []: #Allocate all the gpus print("TASKS ", tasks_queue) popped_thing = heapq.heappop(tasks_queue) process_specs = popped_thing[2] # To get directly the dict print("process got: ", process_specs) print(free_gpus, resources_on_most_free_gpu) if process_specs[ 'type'] == 'train' and resources_on_most_free_gpu >= allocation_parameters[ 'train_cost']: free_gpus, resources_on_most_free_gpu, gpu_number = allocate_gpu_resources( free_gpus, allocation_parameters['train_cost']) #execute_train(gpu_number, process_specs['folder'], process_specs['experiment']) process_specs.update({'gpu': gpu_number}) executing_processes.append(process_specs) elif process_specs[ 'type'] == 'validation' and resources_on_most_free_gpu >= allocation_parameters[ 'validation_cost']: free_gpus, resources_on_most_free_gpu, gpu_number = allocate_gpu_resources( free_gpus, allocation_parameters['validation_cost']) #execute_validation(gpu_number, process_specs['folder'], process_specs['experiment'], # process_specs['dataset']) process_specs.update({'gpu': gpu_number}) executing_processes.append(process_specs) elif process_specs[ 'type'] == 'drive' and resources_on_most_free_gpu >= allocation_parameters[ 'drive_cost']: free_gpus, resources_on_most_free_gpu, gpu_number = allocate_gpu_resources( free_gpus, allocation_parameters['drive_cost']) #execute_drive(gpu_number, process_specs['folder'], process_specs['experiment'], # process_specs['environment']) process_specs.update({'gpu': gpu_number}) executing_processes.append(process_specs) random_process = random.choice(executing_processes) print('random process', random_process) fp_name = random_process['experiment'] g_conf.immutable(False) merge_with_yaml('configs/test_exps/' + fp_name + '.yaml') # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS if random_process['type'] == 'drive': set_type_of_process(random_process['type'], random_process['environment']) elif random_process['type'] == 'validation': set_type_of_process(random_process['type'], random_process['dataset']) else: set_type_of_process(random_process['type']) random_message = random.choice(['Finished', 'Error', 'Iterating']) print('set ', random_process['type'], ' from ', random_process['experiment'], ' to ', random_message) if random_message == 'Iterating': coil_logger.add_message(random_message, {'Iteration': 1}, 1) coil_logger.add_message(random_message, {'Iteration': 2}, 2) else: coil_logger.add_message(random_message, {}) free_gpus, resources_on_most_free_gpu, executing_processes = get_gpu_resources( allocated_gpus, executing_processes, allocation_parameters) coil_logger.close() if len(executing_processes) == 0: break print("Free GPU After ", free_gpus, resources_on_most_free_gpu) print("WE have ", len(executing_processes), " Running.")
def plot_folder_summaries(exp_batch, train, validation_datasets, drive_environments, verbose=False): # TODO: if train is not running the user should be warned os.system('clear') process_names = [] if train: process_names.append('train') for val in validation_datasets: process_names.append('validation' + '_' + val) for drive in drive_environments: process_names.append('drive' + '_' + drive) experiments_list = os.listdir(os.path.join('configs', exp_batch)) experiments_list = [ experiment.split('.')[-2] for experiment in experiments_list ] print(experiments_list) for experiment in experiments_list: if experiment == '': raise ValueError("Empty Experiment on List") g_conf.immutable(False) merge_with_yaml( os.path.join('configs', exp_batch, experiment + '.yaml')) print(BOLD + experiment + ' : ' + g_conf.EXPERIMENT_GENERATED_NAME + END) for process in process_names: try: output = get_status(exp_batch, experiment, process) except: import traceback traceback.print_exc() status = output[0] summary = output[1] print(' ', process) if status == 'Not Started': print(' STATUS: ', BOLD + status + END) elif status == 'Iterating' or status == 'Loading': print(' STATUS: ', YELLOW + status + END) elif status == 'Finished': print(' STATUS: ', GREEN + status + END) elif status == 'Error': print(' STATUS: ', RED + status + END) if status == 'Iterating': if 'train' in process: print_train_summary(summary[status]) if 'validation' in process: if summary[1] != '': # If it has no summary we don't plot print_validation_summary(summary[0][status], summary[1][status]['Summary'], verbose) else: print_validation_summary(summary[0][status], '', verbose) if 'drive' in process: if 'Agent' not in summary[status]: continue checkpoint = summary[status]['Checkpoint'] # Get the sta # This contain the results from completed iterations if g_conf.USE_ORACLE: control_filename = 'control_output_auto.csv' else: control_filename = 'control_output.csv' csv_file_path = os.path.join('_logs', exp_batch, experiment, process + '_csv', control_filename) path = exp_batch + '_' + experiment + '_' + str(checkpoint) \ + '_' + process.split('_')[0] + '_' + control_filename[:-4] \ + '_' + process.split('_')[1] + '_' + process.split('_')[2] print_drive_summary(get_latest_path(path), csv_file_path, checkpoint, verbose)
def execute(gpu, exp_batch, exp_alias, dataset_name, validation_set=False): latest = None # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu g_conf.immutable(False) # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) # If using validation dataset, fix a very high number of hours if validation_set: g_conf.NUMBER_OF_HOURS = 10000 g_conf.immutable(True) # Define the dataset. full_dataset = [ os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) ] augmenter = Augmenter(None) if validation_set: # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(full_dataset, transform=augmenter, preload_names=[dataset_name]) else: dataset = CoILDataset(full_dataset, transform=augmenter, preload_names=[ str(g_conf.NUMBER_OF_HOURS) + 'hours_' + dataset_name ], train_dataset=True) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) # Define model model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) """ ###### Run a single driving benchmark specified by the checkpoint were validation is stale ###### """ if g_conf.FINISH_ON_VALIDATION_STALE is not None: while validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) is None: time.sleep(0.1) validation_state_iteration = validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(validation_state_iteration) + '.pth')) print("Validation loaded ", validation_state_iteration) else: """ ##### Main Loop , Run a benchmark for each specified checkpoint on the "Test Configuration" ##### """ while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): # Get the correct checkpoint # We check it for some task name, all of then are ready at the same time if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]): latest = get_next_checkpoint( g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(latest) + '.pth')) print("Validation loaded ", latest) else: time.sleep(0.1) # Load the model and prepare set it for evaluation model.load_state_dict(checkpoint['state_dict']) model.cuda() model.eval() first_iter = True for data in data_loader: # Compute the forward pass on a batch from the dataset and get the intermediate # representations of the squeeze network if "seg" in g_conf.SENSORS.keys(): perception_rep, speed_rep, intentions_rep = \ model.get_intermediate_representations(data, dataset.extract_inputs(data).cuda(), dataset.extract_intentions(data).cuda()) perception_rep = perception_rep.data.cpu() speed_rep = speed_rep.data.cpu() intentions_rep = intentions_rep.data.cpu() if first_iter: perception_rep_all = perception_rep speed_rep_all = speed_rep intentions_rep_all = intentions_rep else: perception_rep_all = torch.cat( [perception_rep_all, perception_rep], 0) speed_rep_all = torch.cat([speed_rep_all, speed_rep], 0) intentions_rep_all = torch.cat( [intentions_rep_all, intentions_rep], 0) first_iter = False # Save intermediate representations perception_rep_all = perception_rep_all.tolist() speed_rep_all = speed_rep_all.tolist() intentions_rep_all = intentions_rep_all.tolist() np.save( os.path.join( '_preloads', exp_batch + '_' + exp_alias + '_' + dataset_name + '_representations'), [perception_rep_all, speed_rep_all, intentions_rep_all])
def test_simulate_save_and_read(self): g_conf.immutable(False) # TODO: this merge is weird. merge_with_yaml('test/test_checkpoint.yaml') # JUST A TRICK TO CONTAIN THE CURRENT LIMITATIONS set_type_of_process('validation') exp_batch = 'test' exp_alias = 'test_checkpoint' checkpoint = get_latest_saved_checkpoint() self.assertEqual(checkpoint, None) for iteration in range(0, int(g_conf.NUMBER_ITERATIONS / 2)): if is_ready_to_save(iteration): state = { 'iteration': iteration, } torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) for validation in g_conf.TEST_SCHEDULE[ 0:int(len(g_conf.TEST_SCHEDULE) / 2)]: if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) # Create the checkpoint file coil_logger.write_on_csv(latest, [0.1, 0.2, 0.0]) print(latest) self.assertEqual(latest, 800) for iteration in range(int(g_conf.NUMBER_ITERATIONS / 2), g_conf.NUMBER_ITERATIONS): if is_ready_to_save(iteration): state = { 'iteration': iteration, } # TODO : maybe already summarize the best model ??? torch.save( state, os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(iteration) + '.pth')) if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) coil_logger.write_on_csv(latest, [0.1, 0.2, 0.0]) while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE): latest = get_next_checkpoint(g_conf.TEST_SCHEDULE) # Create the checkpoint file coil_logger.write_on_csv(latest, [0.1, 0.2, 0.0])