def test_boolean_values(self): dg = Dictionary_Generator() mn = 'test_bool_values' dg.append_values(modality_name=mn, values=pd.Series([True, False, True])) td = dg.get(modality_name=mn) npt.assert_equal(td['label'], np.linspace(0, 1, 2, dtype=int)) npt.assert_equal(td['name'], np.array([True, False]))
def test_string_values(self): dg = Dictionary_Generator() mn = 'test_str_values' dg.append_values(modality_name=mn, values=pd.Series(['a', 'b', 'c']))\ .append_values(modality_name=mn, values=pd.Series(['a', 'b', 'd'])) td = dg.get(modality_name=mn) npt.assert_equal(td['label'], np.linspace(0, 3, 4, dtype=int)) npt.assert_equal(td['name'], np.array(['a', 'b', 'c', 'd']))
def test_values_not_in_suggested(self): mn = 'test_not_in_suggested' fm = FileManagerMock( fake_values={ FileManagerMock.get_dictionary_name( dataset_name='a', modality_name=mn, ): build_empty_dictionary(['a', 'b', 'e']), FileManagerMock.get_dictionary_name( dataset_name='b', modality_name=mn, ): build_empty_dictionary(['a', 'b', 'c', 'd']) }) dg = Dictionary_Generator() dg.append_suggested_dictionary(dataset_name='a', modality_name=mn, FMSingleton=fm)\ .append_suggested_dictionary(dataset_name='b', modality_name=mn, FMSingleton=fm) dg.append_values(modality_name=mn, values=pd.Series(['a', 'b', 'c']))\ .append_values(modality_name=mn, values=pd.Series(['a', 'b', 'd', 'i'])) self.assertRaises(IndexError, lambda: dg.get(modality_name=mn)) td = dg.get(modality_name=mn, action_on_missing='silent') npt.assert_equal(td['label'], np.linspace(0, 4, 5, dtype=int)) npt.assert_equal(td['name'], np.array(['a', 'b', 'c', 'd', 'e']))
def getBipolar(self, content): dg = Dictionary_Generator() dg.append_values(modality_name='test', values=content) ConfigMock() return Bipolar( dataset_name='test_ds', dataset_cfgs={}, experiment_name='test_exp', experiment_cfgs={}, modality_name='test_modality', content=content, modality_cfgs={}, dictionary=dg.get_bipolar_dictionary('test'), )
def getMultiBipolar(self, content): dg = Dictionary_Generator() for column_name in content: dg.append_values(modality_name='test', values=content[column_name]) ConfigMock() return Multi_Bipolar( dataset_name='test_ds', dataset_cfgs={}, experiment_name='test_exp', experiment_cfgs={}, modality_name='test_modality', content=content, modality_cfgs={ 'columns': content.columns.to_list(), 'to_each_view_its_own_label': False, 'skip_dictionary_save': True, }, dictionary=dg.get_bipolar_dictionary('test'), )
def test_read_suggested(self): mn = 'test_suggested' fm = FileManagerMock( fake_values={ FileManagerMock.get_dictionary_name( dataset_name='a', modality_name=mn, ): build_empty_dictionary(['a', 'b', 'e']), FileManagerMock.get_dictionary_name( dataset_name='b', modality_name=mn, ): build_empty_dictionary(['a', 'b', 'c', 'd']) }) dg = Dictionary_Generator() dg.append_suggested_dictionary(dataset_name='a', modality_name=mn, FMSingleton=fm)\ .append_suggested_dictionary(dataset_name='b', modality_name=mn, FMSingleton=fm) td = dg.get_merged_suggested_dictionary(modality_name=mn) npt.assert_equal(td['label'], np.linspace(0, 4, 5, dtype=int)) npt.assert_equal(td['name'], np.array(['a', 'b', 'c', 'd', 'e']))
def collect_dictionaries(self): """ Check all the Datasets for common items, e.g. body part and then create a general dictionary for all of them. """ datasets = [] for scene in self.scenario_cfgs['scenes']: for task in scene['tasks'].values(): if task['dataset_name'] not in datasets: datasets.append(task['dataset_name']) configs = {} for dataset_name in datasets: configs[dataset_name] = File_Manager().read_dataset_config( dataset_name) modalities_with_dictionaries = [ 'one_vs_rest', 'bipolar', 'multi_bipolar', ] # TODO: add 'hierarchical_label' but this has some fancy logic :-S dictionary_candidates = [] for dataset_name in datasets: config = configs[dataset_name] try: for experiment in config['experiments'].values(): if isinstance(experiment['modalities'], dict): [ dictionary_candidates.append(name) for name, cfg in experiment['modalities'].items() if cfg['type'].lower() in modalities_with_dictionaries and name not in dictionary_candidates ] except Exception as e: raise Exception( f'Failed to get dictionary for {dataset_name}: {e}') # Store all the different values available for this modality into the dictionary singleton that # keeps track of the unique values dg = Dictionary_Generator() for modality_name in dictionary_candidates: for dataset_name in datasets: dg.append_suggested_dictionary(dataset_name=dataset_name, modality_name=modality_name) config = configs[dataset_name] for experiment in config['experiments'].values(): annotations = File_Manager().read_csv_annotations( dataset_name, annotations_rel_path=experiment['annotations_path'], # Multi-view argument should be irrelevant for this ) if annotations is None: raise ValueError( f'Could not find the dataset: {dataset_name} in {experiment["annotations_path"]}' ) modalities = experiment['modalities'] if modalities == 'same_as_train_set': modalities = config['experiments']['train_set'][ 'modalities'] if modality_name in modalities: if 'column_name' in modalities[modality_name]: try: colname = modalities[modality_name][ 'column_name'] dg.append_values(modality_name=modality_name, values=annotations[colname]) except KeyError as e: Console_UI().warn_user( f'Got a key annotation exception for {colname}' ) Console_UI().warn_user( modalities[modality_name]) Console_UI().warn_user(annotations.columns) raise e except Exception as e: Console_UI().warn_user( f'Got an annotation exception for {colname}' ) Console_UI().warn_user( modalities[modality_name]) Console_UI().warn_user(annotations) raise e elif 'columns' in modalities[modality_name]: for column_name in modalities[modality_name][ 'columns']: if isinstance(column_name, dict): assert 'csv_name' in column_name, \ f'The column doesn\'t have the expected csv_name element, got: {column_name}' column_name = column_name['csv_name'] if column_name not in annotations: n = 3 if len( annotations.columns) < 10 else ceil( len(annotations.columns) / 3) closest = get_close_matches( word=column_name, possibilities=annotations.columns, n=n, ) closest = ', '.join(closest) raise IndexError( f'The {column_name} from {modality_name} doesn\'t exist.' + f' Closest matching are: {closest}') dg.append_values( modality_name=modality_name, values=annotations[column_name]) else: raise IndexError( f'Expected {modality_name} to have either columns or column_name defined' )
def get_modality_and_content( annotations, modality_name: str, modality_cfgs: dict, ignore_index: int, ): content = None dg = Dictionary_Generator() if modality_cfgs['type'].lower() == 'Multi_Bipolar'.lower(): assert 'columns' in modality_cfgs, f'modality_cfgs {modality_name}: {yamlDump} must contain "columns"' columns = Multi_Bipolar.get_csv_column_names(column_defintions=modality_cfgs['columns'], modality_name=modality_name) ld = None if 'dictionary' in modality_cfgs: ld = modality_cfgs['dictionary'] return Multi_Bipolar, annotations[columns], dg.get_bipolar_dictionary( modality_name=modality_name, label_dictionary=ld, ) if modality_cfgs['type'].lower() == 'Multi_Coordinate'.lower(): assert 'column_prefixes' in modality_cfgs columns = [] for prefix in modality_cfgs['column_prefixes']: columns.append(f'{prefix}_x') columns.append(f'{prefix}_y') for colname in columns: assert colname in annotations.columns, f'The {colname} doesn\'t exist among columns in annotation' return Multi_Coordinate, annotations[columns], None dictionary = dg.get(modality_name) if modality_cfgs['type'].lower() == 'Implicit'.lower(): assert('consistency' in modality_cfgs), \ 'modality_cfgs %s:%s must contain "consistency"' % ( modality_name, yamlDump(modality_cfgs) ) if modality_cfgs['consistency'].lower() == 'Number'.lower(): return Implicit_Number, content, dictionary elif modality_cfgs['consistency'].lower() == '1D'.lower(): return Implicit_Sequence, content, dictionary elif modality_cfgs['consistency'].lower() == '2D'.lower(): return Implicit_Plane, content, dictionary elif modality_cfgs['consistency'].lower() == '3D'.lower(): return Implicit_Volume, content, dictionary else: raise BaseException('Unknown consistency: "%s"' % (modality_cfgs['consistency'])) elif modality_cfgs['type'].lower() == 'Style'.lower(): if modality_cfgs['consistency'].lower() == 'Number'.lower(): return Style_Number, content, dictionary elif modality_cfgs['consistency'].lower() == '1D'.lower(): return Style_Sequence, content, dictionary elif modality_cfgs['consistency'].lower() == '2D'.lower(): return Style_Plane, content, dictionary elif modality_cfgs['consistency'].lower() == '3D'.lower(): return Style_Volume, content, dictionary elif modality_cfgs['type'].lower() == 'ID_from_Indices'.lower(): return ID_from_Indices, pd.Series(ignore_index, index=annotations.index, dtype='int64'), dictionary elif modality_cfgs['type'].lower() == 'Pseudo_Label'.lower(): return Pseudo_Label, content, dictionary else: assert('column_name' in modality_cfgs),\ 'modality_cfgs %s:\n%s\n must contain "column_name"' % ( modality_name, yamlDump(modality_cfgs) ) colname = modality_cfgs['column_name'] if colname not in annotations: available_cols = ", ".join(str(i) for i in list(annotations)) raise BaseException('Unknown column "%s" - in the dataset with cols: %s' % (colname, available_cols)) content = annotations[colname] if modality_cfgs['type'].lower() == 'One_vs_Rest'.lower(): return One_vs_Rest, content, dictionary elif modality_cfgs['type'].lower() == 'Bipolar'.lower(): ld = None if 'dictionary' in modality_cfgs: ld = modality_cfgs['dictionary'] return Bipolar, content, dg.get_bipolar_dictionary( modality_name=modality_name, label_dictionary=ld, ) elif modality_cfgs['type'].lower() == 'Char_Sequence'.lower(): return Char_Sequence, content, dictionary elif modality_cfgs['type'].lower() == 'Image_from_Filename'.lower(): return Image_from_Filename, content, dictionary elif modality_cfgs['type'].lower() == 'Hierarchical_Label'.lower(): return Hierarchical_Label, content, dictionary else: raise BaseException('Unknown column type: "%s"' % (modality_cfgs['type']))