def main(args: Namespace, config: AttrDict): # setup logging setup_logging(__name__) # print the coniguration used print_cfg(config) # setup the environment variables set_env_vars(local_rank=0, node_id=0, cfg=config) # extract the features launch_distributed( config, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) # Get the names of the features that we are extracting. If user doesn't # specify the features to evaluate, we get the full model output and freeze # head/trunk both as caution. feat_names = get_trunk_output_feature_names(config.MODEL) if len(feat_names) == 0: feat_names = ["heads"] for layer in feat_names: top1, top5 = nearest_neighbor_test(config, layer_name=layer) logging.info(f"layer: {layer} Top1: {top1}, Top5: {top5}") # close the logging streams including the filehandlers shutdown_logging()
def main(args: Namespace, config: AttrDict): # setup logging setup_logging(__name__) # print the coniguration used print_cfg(config) # setup the environment variables set_env_vars(local_rank=0, node_id=0, cfg=config) # Extract the features if no path to the extract features is provided if not config.NEAREST_NEIGHBOR.FEATURES.PATH: launch_distributed( config, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) config.NEAREST_NEIGHBOR.FEATURES.PATH = get_checkpoint_folder(config) # Run KNN at all the extract features run_knn_at_all_layers(config) # close the logging streams including the filehandlers shutdown_logging()
def hydra_main(overrides: List[Any]): ###################################################################################### # DO NOT MOVE THIS IMPORT TO TOP LEVEL: submitit processes will not be initialized # correctly (MKL_THREADING_LAYER will be set to INTEL instead of GNU) ###################################################################################### from vissl.hooks import default_hook_generator ###################################################################################### print(f"####### overrides: {overrides}") with initialize_config_module(config_module="vissl.config"): cfg = compose("defaults", overrides=overrides) args, config = convert_to_attrdict(cfg) if config.SLURM.USE_SLURM: assert ( is_submitit_available() ), "Please 'pip install submitit' to schedule jobs on SLURM" launch_distributed_on_slurm(engine_name=args.engine_name, cfg=config) else: launch_distributed( cfg=config, node_id=args.node_id, engine_name=args.engine_name, hook_generator=default_hook_generator, )
def main(args: Namespace, cfg: AttrDict): setup_logging(__name__, output_dir=get_checkpoint_folder(cfg)) # Extract the features if the feature extract is enabled if cfg.CLUSTERFIT.FEATURES.EXTRACT: # We cannot have automatic extraction with more than 1 node or otherwise # we would have to run this script on several nodes and thus have several # parallel clustering of the features. The automatic extraction is only # there as a shortcut when running on a single node assert (cfg.DISTRIBUTED.NUM_NODES == 1 ), "Automatic extraction can only work with 1 node" # Make sure to dump the features at the desired path cfg.CHECKPOINT.DIR = cfg.CLUSTERFIT.FEATURES.PATH cfg.CHECKPOINT.APPEND_DISTR_RUN_ID = False # Run the extraction of features set_env_vars(local_rank=0, node_id=0, cfg=cfg) logging.info("Setting seed....") set_seeds(cfg, args.node_id) launch_distributed( cfg, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) # Else setup the path manager (done in set_env_vars) in # case of feature extraction above else: setup_path_manager() cluster_features(cfg) shutdown_logging()
def run_integration_test(config: AttrDict, engine_name: str = "train") -> IntegrationTestLogs: """ Helper function to run an integration test on a given configuration """ launch_distributed( cfg=config, node_id=0, engine_name=engine_name, hook_generator=default_hook_generator, ) return IntegrationTestLogs(run_dir=os.getcwd())
def extract_low_shot_features(args: Namespace, cfg: AttrDict, output_dir: str): dataset_name = cfg["SVM"]["low_shot"]["dataset_name"] k_values = cfg["SVM"]["low_shot"]["k_values"] sample_inds = cfg["SVM"]["low_shot"]["sample_inds"] if "voc" in dataset_name: # extract the features. In case of voc07 low-shot, we extract the # features on full train and test sets. Both sets have about 5K images # we extract launch_distributed( cfg, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) elif "places" in dataset_name: # in case of places, since the features size could become large, we need # to extract features at smaller subsamples data_paths, label_paths = dataset_catalog.get_data_files( split="TRAIN", dataset_config=cfg["DATA"]) targets = load_file(label_paths[0]) logging.info("Generating low-shot samples for Places205...") generate_places_low_shot_samples(targets, k_values, sample_inds, output_dir, data_paths[0]) test_features_extracted = False for idx in sample_inds: for k in k_values: out_img_file = f"{output_dir}/train_images_sample{idx}_k{k}.npy" out_lbls_file = f"{output_dir}/train_labels_sample{idx}_k{k}.npy" cfg.DATA.TRAIN.DATA_PATHS = [out_img_file] cfg.DATA.TRAIN.LABEL_PATHS = [out_lbls_file] cfg.CHECKPOINT.DIR = f"{output_dir}/sample{idx}_k{k}" logging.info( f"Extracting features for places low shot: sample{idx}_k{k}" ) # we want to extract the test features only once since the test # features are commonly used for testing for all low-shot setup. if test_features_extracted: cfg.TEST_MODEL = False launch_distributed( cfg, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) test_features_extracted = True # set the test model to true again after feature extraction is done cfg.TEST_MODEL = True else: raise RuntimeError(f"Dataset not recognised: {dataset_name}")
def main(args: Namespace, config: AttrDict): # setup logging setup_logging(__name__, output_dir=get_checkpoint_folder(config)) # print the coniguration used print_cfg(config) assert config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON, ( "Feature eval mode is not ON. Can't run train_svm. " "Set config.MODEL.FEATURE_EVAL_SETTINGS.EVAL_MODE_ON=True " "in your config or from command line.") # extract the features if not config.SVM_FEATURES_PATH: launch_distributed( config, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) config.SVM_FEATURES_PATH = get_checkpoint_folder(config) # Get the names of the features that we extracted features for. If user doesn't # specify the features to evaluate, we get the full model output and freeze # head/trunk both as caution. layers = get_trunk_output_feature_names(config.MODEL) if len(layers) == 0: layers = ["heads"] output_dir = get_checkpoint_folder(config) running_tasks = [ mp.Process(target=train_svm, args=(config, output_dir, layer)) for layer in layers ] for running_task in running_tasks: running_task.start() for running_task in running_tasks: running_task.join() # collect the mAP stats for all the layers and report output_mAP = [] for layer in layers: try: ap_file = f"{output_dir}/{layer}/test_ap.npy" output_mAP.append(round(100.0 * np.mean(load_file(ap_file)), 3)) except Exception: output_mAP.append(-1) logging.info(f"AP for various layers:\n {layers}: {output_mAP}") # close the logging streams including the filehandlers shutdown_logging()
def main(args: Namespace, config: AttrDict, node_id=0): config = validate_and_infer_config(config) # setup the environment variables set_env_vars(local_rank=0, node_id=node_id, cfg=config) # setup the logging checkpoint_folder = get_checkpoint_folder(config) setup_logging(__name__, output_dir=checkpoint_folder, rank=os.environ["RANK"]) if (config.IMG_RETRIEVAL.USE_FEATURE_EXTRACTION_ENGINE and not config.IMG_RETRIEVAL.FEATURE_EXTRACTION_DIR): # extract the train/database features. config = adapt_train_database_extract_config(config, checkpoint_folder) logging.info("Beginning extract features for database set.") launch_distributed( config, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) # extract the query features. config = adapt_query_extract_config(config, checkpoint_folder) logging.info("Beginning extract features for query set.") launch_distributed( config, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) # print the config print_cfg(config) instance_retrieval_test(args, config) logging.info(f"Performance time breakdow:\n{PERF_STATS.report_str()}") # close the logging streams including the filehandlers shutdown_logging()
def run_pretraining( self, with_fsdp: bool, with_activation_checkpointing: bool, with_mixed_precision: bool, ): with self._in_temporary_directory() as dir_name: args, config = self._create_pretraining_config( with_fsdp=with_fsdp, with_activation_checkpointing=with_activation_checkpointing, with_mixed_precision=with_mixed_precision, ) launch_distributed( cfg=config, node_id=args.node_id, engine_name=args.engine_name, hook_generator=default_hook_generator, ) return self.capture_losses(os.path.join(dir_name, "log.txt"))
def extract_features_and_run_knn(node_id: int, config: AttrDict): setup_logging(__name__) print_cfg(config) set_env_vars(local_rank=0, node_id=0, cfg=config) # Extract the features if no path to the extract features is provided if not config.NEAREST_NEIGHBOR.FEATURES.PATH: launch_distributed( config, node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) config.NEAREST_NEIGHBOR.FEATURES.PATH = get_checkpoint_folder(config) # Run KNN on all the extract features run_knn_at_all_layers(config) # close the logging streams including the file handlers shutdown_logging()
def main(args: Namespace, config: AttrDict): # setup logging setup_logging(__name__) # print the coniguration used print_cfg(config) # setup the environment variables set_env_vars(local_rank=0, node_id=0, cfg=config) # extract the label predictions on the test set launch_distributed( config, args.node_id, engine_name="extract_label_predictions", hook_generator=default_hook_generator, ) geolocalization_test(config) # close the logging streams including the filehandlers shutdown_logging()
def main(args: Namespace, cfg: AttrDict): # setup logging setup_logging(__name__) # setup the environment variables set_env_vars(local_rank=0, node_id=0, cfg=cfg) # set seeds logging.info("Setting seed....") set_seeds(cfg, args.node_id) # extract the features. We enable the feature extraction as well. launch_distributed( cfg, args.node_id, engine_name="extract_features", hook_generator=default_hook_generator, ) # cluster the extracted features cluster_features_and_label(args, cfg) # close the logging streams including the filehandlers shutdown_logging()