def tests_for_all_extract(self): search_path = os.path.join( configs, os.path.normpath('test_data/config.yml/' 'test_extract_*.config.yml')) extract_config_files = glob.glob(search_path) for extract_config_file in extract_config_files: if os.getcwd() != homedir: os.chdir(homedir) hvc.extract(extract_config_file) extract_config = hvc.parse_config(extract_config_file, 'extract') for todo in extract_config['todo_list']: # switch to test dir os.chdir(todo['output_dir']) extract_outputs = list( filter(os.path.isdir, glob.glob('*extract_output*'))) extract_outputs.sort(key=os.path.getmtime) os.chdir(extract_outputs[-1]) # most recent ftr_files = glob.glob('features_from*') ftr_dicts = [] for ftr_file in ftr_files: ftr_dicts.append(joblib.load(ftr_file)) if any(['features' in ftr_dict for ftr_dict in ftr_dicts]): assert all( ['features' in ftr_dict for ftr_dict in ftr_dicts]) for ftr_dict in ftr_dicts: labels = ftr_dict['labels'] if 'features' in ftr_dict: features = ftr_dict['features'] assert features.shape[0] == len(labels) # make sure number of features i.e. columns is constant across feature matrices ftr_cols = [ ftr_dict['features'].shape[1] for ftr_dict in ftr_dicts ] assert np.unique(ftr_cols).shape[-1] == 1 if any([ 'neuralnets_input_dict' in ftr_dict for ftr_dict in ftr_dicts ]): assert all([ 'neuralnets_input_dict' in ftr_dict for ftr_dict in ftr_dicts ]) # make sure rows in summary dict features == sum of rows of each ftr file features summary_file = glob.glob('summary_feature_file_*') # (should only be one summary file) assert len(summary_file) == 1 summary_dict = joblib.load(summary_file[0])
def _yaml_config_asserts(self, extract_yaml_config_file, tmp_output_dir): replace_dict = {'output_dir': ('replace with tmp_output_dir', str(tmp_output_dir))} # have to put tmp_output_dir into yaml file extract_config_rewritten = rewrite_config(extract_yaml_config_file, tmp_output_dir, replace_dict) # helper function that is called by tests below hvc.extract(extract_config_rewritten) extract_config = hvc.parse_config(extract_config_rewritten, 'extract') for todo in extract_config['todo_list']: os.chdir(todo['output_dir']) extract_outputs = list( filter(os.path.isdir, glob('*extract_output*') ) ) extract_outputs.sort(key=os.path.getmtime) os.chdir(extract_outputs[-1]) # most recent ftr_files = glob('features_from*') ftr_dicts = [] for ftr_file in ftr_files: ftr_dicts.append(joblib.load(ftr_file)) if any(['features' in ftr_dict for ftr_dict in ftr_dicts]): assert all(['features' in ftr_dict for ftr_dict in ftr_dicts]) for ftr_dict in ftr_dicts: labels = ftr_dict['labels'] if 'features' in ftr_dict: features = ftr_dict['features'] assert features.shape[0] == len(labels) # make sure number of features i.e. columns is constant across feature matrices ftr_cols = [ftr_dict['features'].shape[1] for ftr_dict in ftr_dicts] assert np.unique(ftr_cols).shape[-1] == 1 if any(['neuralnets_input_dict' in ftr_dict for ftr_dict in ftr_dicts]): assert all(['neuralnets_input_dict' in ftr_dict for ftr_dict in ftr_dicts])
def _yaml_config_asserts(self, select_yaml_config_path, tmp_output_dir, feature_file): select_config_rewritten = rewrite_config( select_yaml_config_path, tmp_output_dir, replace_dict={ 'feature_file': ('replace with feature_file', feature_file), 'output_dir': ('replace with tmp_output_dir', str(tmp_output_dir)) }) select_outputs_before = glob( os.path.join(str(tmp_output_dir), 'select_output*', 'summary_model_select_file*')) hvc.select(select_config_rewritten) # helper function with assertions shared by all # tests for hvc.select run with config.yml files select_outputs_after = glob( os.path.join(str(tmp_output_dir), 'select_output*', 'summary_model_select_file*')) select_output = [ after for after in select_outputs_after if after not in select_outputs_before ] # should only be one summary output file assert len(select_output) == 1 # now check for every model in config # if there is corresponding folder with model files etc select_config = hvc.parse_config(select_config_rewritten, 'select') select_output_dir = os.path.dirname(select_output[0]) select_model_dirs = next( os.walk(select_output_dir))[1] # [1] to return just dir names select_model_folder_names = [ determine_model_output_folder_name(model_dict) for model_dict in select_config['models'] ] for folder_name in select_model_folder_names: assert folder_name in select_model_dirs return True
def check_select_output(config_path, output_dir): """ """ select_output = glob( os.path.join(str(output_dir), 'summary_model_select_file*')) # should only be one summary output file assert len(select_output) == 1 # now check for every model in config # if there is corresponding folder with model files etc select_config = hvc.parse_config(config_path, 'select') select_model_dirs = next( os.walk(output_dir))[1] # [1] to return just dir names select_model_folder_names = [ determine_model_output_folder_name(model_dict) for model_dict in select_config['models'] ] for folder_name in select_model_folder_names: assert folder_name in select_model_dirs return True