def launch_train(args): assert args.config is not None, "Argument --config must be specified. Run 'python main.py --help' for help on arguments." config = run_utils.load_config(args.config) if config is None: print_utils.print_error( "ERROR: cannot continue without a config file. Exiting now...") sys.exit() config["runs_dirpath"] = args.runs_dirpath if args.run_name is not None: config["run_name"] = args.run_name config["new_run"] = args.new_run config["init_run_name"] = args.init_run_name if args.samples is not None: config["samples"] = args.samples if args.batch_size is not None: config["optim_params"]["batch_size"] = args.batch_size if args.max_epoch is not None: config["optim_params"]["max_epoch"] = args.max_epoch if args.fold is None: if "fold" in config: fold = set(config["fold"]) else: fold = {"train"} # Default values for train else: fold = set(args.fold) assert fold == {"train"} or fold == {"train", "val"}, \ "Argument fold when training should be either: ['train'] or ['train', 'val']" config["fold"] = list(fold) print_utils.print_info("Training on fold(s): {}".format(config["fold"])) config["nodes"] = args.nodes config["gpus"] = args.gpus config["nr"] = args.nr config["world_size"] = args.gpus * args.nodes # --- Load params in config set as relative path to another JSON file config = run_utils.load_defaults_in_config( config, filepath_key="defaults_filepath") # Setup num_workers per process: if config["num_workers"] is None: config["num_workers"] = int(torch.multiprocessing.cpu_count() / config["gpus"]) # --- Distributed init: os.environ['MASTER_ADDR'] = args.master_addr os.environ['MASTER_PORT'] = args.master_port manager = torch.multiprocessing.Manager() shared_dict = manager.dict() shared_dict["run_dirpath"] = None shared_dict["init_checkpoints_dirpath"] = None barrier = manager.Barrier(args.gpus) torch.multiprocessing.spawn(train_process, nprocs=args.gpus, args=(config, shared_dict, barrier))
def launch_inference_from_filepath(args): from frame_field_learning.inference_from_filepath import inference_from_filepath # --- First step: figure out what run (experiment) is to be evaluated # Option 1: the run_name argument is given in which case that's our run run_name = None config = None if args.run_name is not None: run_name = args.run_name # Else option 2: Check if a config has been given to look for the run_name if args.config is not None: config = run_utils.load_config(args.config) if config is not None and "run_name" in config and run_name is None: run_name = config["run_name"] # Else abort... if run_name is None: print_utils.print_error( "ERROR: the run to evaluate could no be identified with the given arguments. " "Please specify either the --run_name argument or the --config argument " "linking to a config file that has a 'run_name' field filled with the name of " "the run name to evaluate.") sys.exit() # --- Second step: get path to the run and if --config was not specified, load the config from the run's folder run_dirpath = frame_field_learning.local_utils.get_run_dirpath( args.runs_dirpath, run_name) if config is None: config = run_utils.load_config(config_dirpath=run_dirpath) if config is None: print_utils.print_error( f"ERROR: the default run's config file at {run_dirpath} could not be loaded. " f"Exiting now...") sys.exit() # --- Add command-line arguments if args.batch_size is not None: config["optim_params"]["batch_size"] = args.batch_size if args.eval_batch_size is not None: config["optim_params"]["eval_batch_size"] = args.eval_batch_size else: config["optim_params"][ "eval_batch_size"] = 2 * config["optim_params"]["batch_size"] # --- Load params in config set as relative path to another JSON file config = run_utils.load_defaults_in_config( config, filepath_key="defaults_filepath") config["eval_params"]["run_dirpath"] = run_dirpath if args.eval_patch_size is not None: config["eval_params"]["patch_size"] = args.eval_patch_size if args.eval_patch_overlap is not None: config["eval_params"]["patch_overlap"] = args.eval_patch_overlap backbone = get_backbone(config["backbone_params"]) inference_from_filepath(config, args.in_filepath, backbone, args.out_dirpath)
def launch_eval_coco(args): # --- Init: fills mode-specific default command-line arguments if args.fold is None: fold = {"test"} # Default value for eval_coco else: fold = set(args.fold) assert len(fold) == 1, \ "Argument fold when evaluating with COCO should be a single fold" # --- Find which run and which config file to evaluate the run with if args.run_name is None and args.config is None: print_utils.print_error( "ERROR: At least of one --run_name or --config has to be specified." ) sys.exit() elif args.run_name is None and args.config is not None: # Load config config = run_utils.load_config(args.config) # Verify it has a run_name specified if "run_name" not in config: print_utils.print_error( "ERROR: run_name was not found in the provided config file, you can specify it with --run_name" ) sys.exit() run_name = config["run_name"] elif args.run_name is not None and args.config is None: # Load run_name's config run_dirpath = frame_field_learning.local_utils.get_run_dirpath( args.runs_dirpath, args.run_name) config = run_utils.load_config(config_dirpath=run_dirpath) run_name = args.run_name else: # Load specified config and use specified run_name config = run_utils.load_config(args.config) run_name = args.run_name # --- Load params in config set as relative path to another JSON file config = run_utils.load_defaults_in_config( config, filepath_key="defaults_filepath") # --- Second step: Replace parameters in config file from command-line arguments config["eval_params"]["run_name"] = run_name if args.samples is not None: config["samples"] = args.samples config["fold"] = list(fold) # Setup num_workers per process: if config["num_workers"] is None: config["num_workers"] = torch.multiprocessing.cpu_count() eval_coco(config)
def launch_eval(args): # --- Init: fills mode-specific default command-line arguments if args.fold is None: fold = {"test"} # Default value for eval mode else: fold = set(args.fold) assert len(fold) == 1, "Argument 'fold' must be a single fold in eval mode" # --- First step: figure out what run (experiment) is to be evaluated # Option 1: the run_name argument is given in which case that's our run run_name = None config = None if args.run_name is not None: run_name = args.run_name # Else option 2: Check if a config has been given to look for the run_name if args.config is not None: config = run_utils.load_config(args.config) if config is not None and "run_name" in config and run_name is None: run_name = config["run_name"] # Else abort... if run_name is None: print_utils.print_error( "ERROR: the run to evaluate could no be identified with the given arguments. " "Please specify either the --run_name argument or the --config argument " "linking to a config file that has a 'run_name' field filled with the name of " "the run name to evaluate.") sys.exit() # --- Second step: get path to the run and if --config was not specified, load the config from the run's folder run_dirpath = frame_field_learning.local_utils.get_run_dirpath( args.runs_dirpath, run_name) if config is None: config = run_utils.load_config(config_dirpath=run_dirpath) if config is None: print_utils.print_error( f"ERROR: the default run's config file at {run_dirpath} could not be loaded. " f"Exiting now...") sys.exit() # --- Third step: Replace parameters in config file from command-line arguments if args.dataset_params is not None: config["dataset_params"] = python_utils.load_json(args.dataset_params) if args.samples is not None: config["samples"] = args.samples if args.batch_size is not None: config["optim_params"]["batch_size"] = args.batch_size if args.eval_batch_size is not None: config["optim_params"]["eval_batch_size"] = args.eval_batch_size else: config["optim_params"][ "eval_batch_size"] = 2 * config["optim_params"]["batch_size"] config["fold"] = list(fold) config["nodes"] = args.nodes config["gpus"] = args.gpus config["nr"] = args.nr config["world_size"] = args.gpus * args.nodes # --- Load params in config set as relative path to another JSON file config = run_utils.load_defaults_in_config( config, filepath_key="defaults_filepath") config["eval_params"]["run_dirpath"] = run_dirpath if args.eval_patch_size is not None: config["eval_params"]["patch_size"] = args.eval_patch_size if args.eval_patch_overlap is not None: config["eval_params"]["patch_overlap"] = args.eval_patch_overlap # Setup num_workers per process: if config["num_workers"] is None: config["num_workers"] = int(torch.multiprocessing.cpu_count() / config["gpus"]) # --- Distributed init: os.environ['MASTER_ADDR'] = args.master_addr os.environ['MASTER_PORT'] = args.master_port manager = torch.multiprocessing.Manager() shared_dict = manager.dict() shared_dict["name_list"] = manager.list() shared_dict["iou_list"] = manager.list() shared_dict["seg_coco_list"] = manager.list() shared_dict["poly_coco_list"] = manager.list() barrier = manager.Barrier(args.gpus) torch.multiprocessing.spawn(eval_process, nprocs=args.gpus, args=(config, shared_dict, barrier))