def test_load_single_item_1() -> None: """ Test if we can create a classificationItem from the rows for a single subject, including NaN scalar and categorical values. """ csv_string = StringIO( """subject,channel,path,value,scalar1,scalar2,categorical1,categorical2 S1,image1,foo1.nii,,2.1,2.2,True,False S1,image2,foo2.nii,,3.1,,True,False S1,label,,True,1.1,1.2,,False """) df = pd.read_csv(csv_string, sep=",", dtype=str) numerical_columns = ["scalar2", "scalar1"] categorical_columns = ["categorical1", "categorical2"] non_image_feature_channels = _get_non_image_dict( ["label", "image2"], ["scalar2", "scalar1"], ["categorical1", "categorical2"]) item: ScalarDataSource = load_single_data_source( df, subject_id="S1", # Provide values in a different order from the file! image_channels=["image2", "image1"], image_file_column="path", label_channels=["label"], label_value_column="value", non_image_feature_channels=non_image_feature_channels, # Provide values in a different order from the file! numerical_columns=numerical_columns, categorical_data_encoder=CategoricalToOneHotEncoder. create_from_dataframe(dataframe=df, columns=categorical_columns), channel_column="channel") assert item.channel_files[0] == "foo2.nii" assert item.channel_files[1] == "foo1.nii" assert item.label == torch.tensor([1.0]) assert item.label.dtype == torch.float32 assert item.numerical_non_image_features[0] == 1.2 assert item.numerical_non_image_features[2] == 1.1 assert item.numerical_non_image_features[3] == 3.1 assert math.isnan(item.numerical_non_image_features[1].item()) assert np.all(np.isnan(item.categorical_non_image_features[0].numpy())) assert item.categorical_non_image_features[1:].tolist() == [1.0, 1.0, 1.0] assert item.numerical_non_image_features.dtype == torch.float32 item_no_scalars: ScalarDataSource = load_single_data_source( df, subject_id="S1", # Provide values in a different order from the file! image_channels=["image2", "image1"], image_file_column="path", label_channels=["label"], label_value_column="value", non_image_feature_channels={}, numerical_columns=[], channel_column="channel") assert item_no_scalars.numerical_non_image_features.shape == (0, )
def test_load_single_item_6() -> None: """ Test loading of different channels for different categorical features. """ csv_string = StringIO("""subject,path,channel,cat1,cat2,scalar1,label S1,foo1.nii,week1,True,True,1.2,True S1,foo2.nii,week2,False,False,1.2,True S1,foo2.nii,week3,False,True,1.3,True """) df = pd.read_csv(csv_string, sep=",", dtype=str) item: ScalarDataSource = load_single_data_source(df, subject_id="S1", image_channels=["week1"], image_file_column="path", label_channels=["week1"], label_value_column="label", numerical_columns=["scalar1"], non_image_feature_channels={"scalar1": ["week3"], "cat1": ["week1", "week2"], "cat2": ["week3"]}, categorical_data_encoder=CategoricalToOneHotEncoder.create_from_dataframe( dataframe=df, columns=["cat1", "cat2"] ), channel_column="channel") assert torch.all(item.categorical_non_image_features == torch.tensor([0, 1, 1, 0, 0, 1]))
def test_load_single_item_5() -> None: """ Test loading of different channels for different numerical features. """ csv_string = StringIO("""subject,path,channel,scalar1,scalar2,label S1,foo1.nii,week1,2.1,2.2,True S1,foo2.nii,week2,2.3,2.2,True """) df = pd.read_csv(csv_string, sep=",", dtype=str) item: ScalarDataSource = load_single_data_source( df, subject_id="S1", image_channels=["week1"], image_file_column="path", label_channels=["week1"], label_value_column="label", non_image_feature_channels={ "scalar1": ["week1", "week2"], "scalar2": ["week1"] }, numerical_columns=["scalar2", "scalar1"], channel_column="channel") assert item.channel_files[0] == "foo1.nii" assert item.label == torch.tensor([1.0]) assert item.label.dtype == torch.float32 assert torch.all( item.numerical_non_image_features == torch.tensor([2.2, 2.1, 2.3])) assert item.numerical_non_image_features.dtype == torch.float32
def load_item(csv_string: StringIO) -> str: df = pd.read_csv(csv_string, sep=",", dtype=str) numerical_columns = ["scalar2", "scalar1"] non_image_feature_channels = _get_non_image_dict(["label", "image2"], ["scalar2", "scalar1"]) with pytest.raises(Exception) as ex: load_single_data_source(df, subject_id="S1", # Provide values in a different order from the file! image_channels=["image2", "image1"], image_file_column="path", label_channels=["label"], label_value_column="value", # Provide values in a different order from the file! non_image_feature_channels=non_image_feature_channels, numerical_columns=numerical_columns, channel_column="channel") return str(ex)
def test_load_single_item_7() -> None: """ Test loading of different channels for different categorical features. Case where one column value is invalid. """ # Fit the encoder on the valid labels. csv_string_valid = StringIO("""subject,path,channel,cat1,cat2,label S1,foo1.nii,week1,True,True,True S1,foo2.nii,week2,False,False,True S1,foo2.nii,week3,False,,True """) df = pd.read_csv(csv_string_valid, sep=",", dtype=str) encoder = CategoricalToOneHotEncoder.create_from_dataframe( dataframe=df, columns=["cat1", "cat2"]) # Try to encode a dataframe with invalid value csv_string_invalid = StringIO("""subject,path,channel,cat1,cat2,label S1,foo1.nii,week1,True,True,True S1,foo2.nii,week2,houhou,False,False S1,foo2.nii,week3,False,,True """) df = pd.read_csv(csv_string_invalid, sep=",", dtype=str) item: ScalarDataSource = load_single_data_source( df, subject_id="S1", image_channels=["week1"], image_file_column="path", label_channels=["week1"], label_value_column="label", non_image_feature_channels={ "cat1": ["week1", "week2"], "cat2": ["week3"] }, categorical_data_encoder=encoder, channel_column="channel") # cat1 - week1 is valid assert torch.all( item.categorical_non_image_features[0:2] == torch.tensor([0, 1])) # cat1 - week2 is invalid test regression assert torch.all(torch.isnan(item.categorical_non_image_features[2:4])) # cat2 - week 3 is invalid assert torch.all(torch.isnan(item.categorical_non_image_features[4:6]))
def _test_load_labels(label_channels: List[str], transform_labels: Union[Callable, List[Callable]]) -> ScalarDataSource: csv_string = StringIO("""subject,channel,path,value,scalar1,scalar2 S1,label_w1,,1,1.1,1.2 S1,label_w2,,3,, """) df = pd.read_csv(csv_string, sep=",", dtype=str) numerical_columns = ["scalar2", "scalar1"] non_image_feature_channels = _get_non_image_dict(["label_w1"], ["scalar2", "scalar1"]) return load_single_data_source(df, subject_id="S1", channel_column="channel", label_channels=label_channels, label_value_column="value", transform_labels=transform_labels, # Provide values in a different order from the file! non_image_feature_channels=non_image_feature_channels, numerical_columns=numerical_columns, is_classification_dataset=False)
def test_load_single_item_3() -> None: """ Test if we can create a classificationItem from a single row of data (no channels available). """ csv_string = StringIO("""subject,path,value,scalar1,scalar2,label S1,foo1.nii,,2.1,2.2,True """) df = pd.read_csv(csv_string, sep=",", dtype=str) item: ScalarDataSource = load_single_data_source(df, subject_id="S1", image_channels=[], image_file_column="path", label_channels=None, label_value_column="label", non_image_feature_channels={}, numerical_columns=["scalar2", "scalar1"], channel_column="foo") assert item.channel_files[0] == "foo1.nii" assert item.label == torch.tensor([1.0]) assert item.label.dtype == torch.float32 assert item.numerical_non_image_features.tolist() == pytest.approx([2.2, 2.1]) assert item.numerical_non_image_features.dtype == torch.float32