def execute(gpu, exp_batch, exp_alias, dataset_name, validation_set=False): latest = None # We set the visible cuda devices os.environ["CUDA_VISIBLE_DEVICES"] = gpu g_conf.immutable(False) # At this point the log file with the correct naming is created. merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) # If using validation dataset, fix a very high number of hours if validation_set: g_conf.NUMBER_OF_HOURS = 10000 g_conf.immutable(True) # Define the dataset. full_dataset = [ os.path.join(os.environ["COIL_DATASET_PATH"], dataset_name) ] augmenter = Augmenter(None) if validation_set: # Definition of the dataset to be used. Preload name is just the validation data name dataset = CoILDataset(full_dataset, transform=augmenter, preload_names=[dataset_name]) else: dataset = CoILDataset(full_dataset, transform=augmenter, preload_names=[ str(g_conf.NUMBER_OF_HOURS) + 'hours_' + dataset_name ], train_dataset=True) # The data loader is the multi threaded module from pytorch that release a number of # workers to get all the data. data_loader = torch.utils.data.DataLoader( dataset, batch_size=g_conf.BATCH_SIZE, shuffle=False, num_workers=g_conf.NUMBER_OF_LOADING_WORKERS, pin_memory=True) # Define model model = CoILModel(g_conf.MODEL_TYPE, g_conf.MODEL_CONFIGURATION) """ ###### Run a single driving benchmark specified by the checkpoint were validation is stale ###### """ if g_conf.FINISH_ON_VALIDATION_STALE is not None: while validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) is None: time.sleep(0.1) validation_state_iteration = validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(validation_state_iteration) + '.pth')) print("Validation loaded ", validation_state_iteration) else: """ ##### Main Loop , Run a benchmark for each specified checkpoint on the "Test Configuration" ##### """ while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): # Get the correct checkpoint # We check it for some task name, all of then are ready at the same time if is_next_checkpoint_ready(g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]): latest = get_next_checkpoint( g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]) checkpoint = torch.load( os.path.join('_logs', exp_batch, exp_alias, 'checkpoints', str(latest) + '.pth')) print("Validation loaded ", latest) else: time.sleep(0.1) # Load the model and prepare set it for evaluation model.load_state_dict(checkpoint['state_dict']) model.cuda() model.eval() first_iter = True for data in data_loader: # Compute the forward pass on a batch from the dataset and get the intermediate # representations of the squeeze network if "seg" in g_conf.SENSORS.keys(): perception_rep, speed_rep, intentions_rep = \ model.get_intermediate_representations(data, dataset.extract_inputs(data).cuda(), dataset.extract_intentions(data).cuda()) perception_rep = perception_rep.data.cpu() speed_rep = speed_rep.data.cpu() intentions_rep = intentions_rep.data.cpu() if first_iter: perception_rep_all = perception_rep speed_rep_all = speed_rep intentions_rep_all = intentions_rep else: perception_rep_all = torch.cat( [perception_rep_all, perception_rep], 0) speed_rep_all = torch.cat([speed_rep_all, speed_rep], 0) intentions_rep_all = torch.cat( [intentions_rep_all, intentions_rep], 0) first_iter = False # Save intermediate representations perception_rep_all = perception_rep_all.tolist() speed_rep_all = speed_rep_all.tolist() intentions_rep_all = intentions_rep_all.tolist() np.save( os.path.join( '_preloads', exp_batch + '_' + exp_alias + '_' + dataset_name + '_representations'), [perception_rep_all, speed_rep_all, intentions_rep_all])
def execute(gpu, exp_batch, exp_alias, drive_conditions, params): """ Main loop function. Executes driving benchmarks the specified iterations. Args: gpu: exp_batch: exp_alias: drive_conditions: params: Returns: """ try: print("Running ", __file__, " On GPU ", gpu, "of experiment name ", exp_alias) os.environ["CUDA_VISIBLE_DEVICES"] = gpu if not os.path.exists('_output_logs'): os.mkdir('_output_logs') merge_with_yaml(os.path.join('configs', exp_batch, exp_alias + '.yaml')) exp_set_name, town_name = drive_conditions.split('_') experiment_suite_module = __import__( 'drive.suites.' + camelcase_to_snakecase(exp_set_name) + '_suite', fromlist=[exp_set_name]) experiment_suite_module = getattr(experiment_suite_module, exp_set_name) experiment_set = experiment_suite_module() set_type_of_process('drive', drive_conditions) if params['suppress_output']: sys.stdout = open(os.path.join( '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) coil_logger.add_message( 'Loading', {'Poses': experiment_set.build_experiments()[0].poses}) if g_conf.USE_ORACLE: control_filename = 'control_output_auto' else: control_filename = 'control_output' """ ##### Preparing the output files that will contain the driving summary ##### """ experiment_list = experiment_set.build_experiments() # Get all the uniquely named tasks task_list = unique( [experiment.task_name for experiment in experiment_list]) # Now actually run the driving_benchmark latest = get_latest_evaluated_checkpoint(control_filename + '_' + task_list[0]) if latest is None: # When nothing was tested, get latest returns none, we fix that. latest = 0 # The used tasks are hardcoded, this need to be improved file_base = os.path.join('_logs', exp_batch, exp_alias, g_conf.PROCESS_NAME + '_csv', control_filename) for i in range(len(task_list)): # Write the header of the summary file used conclusion # While the checkpoint is not there write_header_control_summary(file_base, task_list[i]) """ ###### Run a single driving benchmark specified by the checkpoint were validation is stale ###### """ if g_conf.FINISH_ON_VALIDATION_STALE is not None: while validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) is None: time.sleep(0.1) validation_state_iteration = validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) driving_benchmark(validation_state_iteration, gpu, town_name, experiment_set, exp_batch, exp_alias, params, control_filename, task_list) else: """ ##### Main Loop , Run a benchmark for each specified checkpoint on the "Test Configuration" ##### """ while not maximun_checkpoint_reach(latest, g_conf.TEST_SCHEDULE): # Get the correct checkpoint # We check it for some task name, all of then are ready at the same time if is_next_checkpoint_ready( g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]): latest = get_next_checkpoint( g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]) driving_benchmark(latest, gpu, town_name, experiment_set, exp_batch, exp_alias, params, control_filename, task_list) else: time.sleep(0.1) coil_logger.add_message('Finished', {}) except KeyboardInterrupt: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Killed By User'}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something happened'})
def execute(gpu: list, exp_folder: str, exp_alias: str, drive_conditions: str, suppress_output: bool, docker: str, record_collisions: bool, no_screen: bool): """ Main loop function. Executes driving benchmarks the specified iterations. Args: :param gpu: list containing the gpus; will use gpu[0] :param exp_folder: name where trained models are stored in _logs :param exp_alias: name of experiment in the exp_folder :param drive_conditions: conditions for driving; from drive/suites: TestT1_Town01 --> test_t1_suite.py TestT2_Town02 --> test_t2_suite.py NocrashTraining_Town01 --> nocrash_training_suite.py NocrashNewWeatherTown_Town02 --> nocrash_new_weather_town_suite.py NocrashNewWeather_Town01 --> nocrash_new_weather_suite.py NocrashNewTown_Town02 --> nocrash_new_town_suite.py EccvTraining_Town01 --> eccv_training_suite.py EccvGeneralization_Town02 --> eccv_generalization_suite.py CorlTraining_Town01 --> corl_training_suite.py CorlNewWeatherTown_Town02 --> corl_new_weather_town_suite.py CorlNewWeather_Town01 --> corl_new_weather_suite.py CorlNewTown_Town02 --> corl_new_town_suite.py :param suppress_output: Save output to the '_output_logs' :param docker: Name of the docker image :param record_collisions: :param no_screen: Returns: """ try: print( f"Running {__file__} on GPU {gpu[0]} of experiment name {exp_alias}" ) os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu[0]) if not os.path.exists('_output_logs'): os.mkdir('_output_logs') merge_with_yaml( os.path.join('configs', exp_folder, f'{exp_alias}.yaml')) # 'TestT1_Town01' -> 'TestT1', 'Town01' exp_set_name, town_name = drive_conditions.split('_') # camelcase_to_snakecase('TestT1') => test_t1 -> 'TestT1' from 'drive.suites.test_t1_suite' will be imported experiment_suite_module = __import__( f'drive.suites.{camelcase_to_snakecase(exp_set_name)}_suite', fromlist=[exp_set_name]) # Load the module and an instance of it experiment_suite_module = getattr(experiment_suite_module, exp_set_name) experiment_set = experiment_suite_module() # instance of TestT1 set_type_of_process('drive', drive_conditions) # drive_TestT1_Town01 if suppress_output: sys.stdout = open(os.path.join( '_output_logs', g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) sys.stderr = open(os.path.join( '_output_logs', exp_alias + '_err_' + g_conf.PROCESS_NAME + '_' + str(os.getpid()) + ".out"), "a", buffering=1) coil_logger.add_message( 'Loading', {'Poses': experiment_set.build_experiments()[0].poses}) if g_conf.USE_ORACLE: control_filename = 'control_output_auto' else: control_filename = 'control_output' """ ##### Preparing the output files that will contain the driving summary ##### """ # Build experiment from the module instance experiment_list = experiment_set.build_experiments( ) # experiment has name '', but it is created # Get all the uniquely named tasks task_list = unique( [experiment.task_name for experiment in experiment_list]) # Now actually run the driving_benchmark latest = get_latest_evaluated_checkpoint(control_filename + '_' + task_list[0]) if latest is None: # When nothing was tested, get latest returns none, we fix that. latest = 0 # The used tasks are hardcoded, this need to be improved file_base = os.path.join('_logs', exp_folder, exp_alias, f'{g_conf.PROCESS_NAME}_csv', control_filename) for i in range(len(task_list)): # Write the header of the summary file used conclusion # While the checkpoint is not there write_header_control_summary(file_base, task_list[i]) """ ###### Run a single driving benchmark specified by the checkpoint were validation is stale ###### """ if g_conf.FINISH_ON_VALIDATION_STALE is not None: while validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) is None: time.sleep(0.1) validation_state_iteration = validation_stale_point( g_conf.FINISH_ON_VALIDATION_STALE) driving_benchmark(validation_state_iteration, gpu, town_name, experiment_set, exp_folder, exp_alias, docker, control_filename, task_list) else: """ ##### Main Loop , Run a benchmark for each specified checkpoint on the "Test Configuration" ##### """ while not maximum_checkpoint_reached(latest): # Get the correct checkpoint # We check it for some task name, all of then are ready at the same time if is_next_checkpoint_ready( g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]): latest = get_next_checkpoint( g_conf.TEST_SCHEDULE, control_filename + '_' + task_list[0]) driving_benchmark(latest, gpu, town_name, experiment_set, exp_folder, exp_alias, docker, control_filename, task_list) else: time.sleep(0.1) coil_logger.add_message('Finished', {}) except KeyboardInterrupt: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Killed By User'}) except: traceback.print_exc() coil_logger.add_message('Error', {'Message': 'Something happened'})