def run(arguments): azure_info_path = arguments.get('--azure-info', None) # if you do not pass arguments for train/valid/test data default to files checked into repo. if not arguments['VALID_DATA_PATH']: dir_path = Path(__file__).parent.absolute() print(dir_path) arguments['VALID_DATA_PATH'] = str(dir_path / 'data_dirs_valid.txt') arguments['TEST_DATA_PATH'] = str(dir_path / 'data_dirs_test.txt') valid_data_dirs = test.expand_data_path(arguments['VALID_DATA_PATH'], azure_info_path) test_data_dirs = test.expand_data_path(arguments['TEST_DATA_PATH'], azure_info_path) test.compute_evaluation_metrics( RichPath.create(arguments['MODEL_PATH'], azure_info_path=azure_info_path), arguments, azure_info_path, valid_data_dirs, test_data_dirs)
def run(arguments, tag_in_vcs=False) -> None: azure_info_path = arguments.get('--azure-info', None) testrun = arguments.get('--testrun') no_eval = arguments.get('--no-eval') max_files_per_dir = arguments.get('--max-files-per-dir') dir_path = Path(__file__).parent.absolute() # if you do not pass arguments for train/valid/test data default to files checked into repo. if not arguments['TRAIN_DATA_PATH']: arguments['TRAIN_DATA_PATH'] = str(dir_path / 'data_dirs_train.txt') arguments['VALID_DATA_PATH'] = str(dir_path / 'data_dirs_valid.txt') arguments['TEST_DATA_PATH'] = str(dir_path / 'data_dirs_test.txt') train_data_dirs = test.expand_data_path(arguments['TRAIN_DATA_PATH'], azure_info_path) valid_data_dirs = test.expand_data_path(arguments['VALID_DATA_PATH'], azure_info_path) test_data_dirs = test.expand_data_path(arguments['TEST_DATA_PATH'], azure_info_path) # default model save location if not arguments['SAVE_FOLDER']: arguments['SAVE_FOLDER'] = str(dir_path.parent / 'resources/saved_models/') save_folder = arguments['SAVE_FOLDER'] model_class = model_restore_helper.get_model_class_from_name( arguments['--model']) hyperparameters = model_class.get_default_hyperparameters() # make name of wandb run = run_id (Doesn't populate yet) hyperparameters['max_epochs'] = int(arguments.get('--max-num-epochs')) if testrun: hyperparameters['max_epochs'] = 2 if not max_files_per_dir: max_files_per_dir = 1 # override hyperparams if flag is passed hypers_override = arguments.get('--hypers-override') if hypers_override is not None: hyperparameters.update(json.loads(hypers_override)) elif arguments.get('--hypers-override-file') is not None: with open(arguments.get('--hypers-override-file')) as f: hyperparameters.update(json.load(f)) os.makedirs(save_folder, exist_ok=True) if tag_in_vcs: hyperparameters['git_commit'] = git_tag_run(run_name) # turns off wandb if you don't want to log anything if arguments.get('--dryrun'): os.environ["WANDB_MODE"] = 'dryrun' # save hyperparams to logging # must filter out type=set from logging when as that is not json serializable results = [] num_random_samples = int(arguments['--num-random-samples']) if num_random_samples > 1: random_data_dir = str(train_data_dirs[0]) + arguments['--run-name'] else: random_data_dir = None for i in range(num_random_samples): run_name = make_run_id(arguments) wandb.init(name=run_name, config={ k: v for k, v in hyperparameters.items() if not isinstance(v, set) }) wandb.config.update({ 'model-class': arguments['--model'], 'train_folder': str(train_data_dirs), 'valid_folder': str(valid_data_dirs), 'save_folder': str(save_folder), 'test_folder': str(test_data_dirs), 'CUDA_VISIBLE_DEVICES': os.environ.get("CUDA_VISIBLE_DEVICES", 'Not Set'), 'run-name': arguments.get('--run-name'), 'CLI-command': ' '.join(sys.argv) }) if arguments.get('--evaluate-model'): model_path = RichPath.create(arguments['--evaluate-model']) else: model_path = run_train(model_class, train_data_dirs, valid_data_dirs, save_folder, hyperparameters, azure_info_path, run_name, arguments['--quiet'], max_files_per_dir=max_files_per_dir, parallelize=not (arguments['--sequential']), random_sample_size=int( args['--random-sample-size']), random_data_dir_name=random_data_dir) if num_random_samples == 1: wandb.config['best_model_path'] = str(model_path) wandb.save(str(model_path.to_local_path())) if no_eval: continue # only limit files in test run if `--testrun` flag is passed by user. elif testrun: compute_evaluation_metrics(model_path, arguments, azure_info_path, valid_data_dirs, test_data_dirs, max_files_per_dir) else: compute_evaluation_metrics(model_path, arguments, azure_info_path, valid_data_dirs, test_data_dirs)