def sidecar_flatten(json_sidecar): """Converts a sidecar from long to short unless unless the command is not COMMAND_TO_LONG then converts to short Parameters ---------- json_sidecar: Sidecar Previously created Sidecar Returns ------- dict A downloadable dictionary file or a file containing warnings """ json_string = json_sidecar.get_as_json_string() sidecar = json.loads(json_string) sr = SidecarMap() df = sr.flatten(sidecar) data = df.to_csv(None, sep='\t', index=False, header=True) display_name = json_sidecar.name file_name = generate_filename(display_name, name_suffix='flattened', extension='.tsv') return { base_constants.COMMAND: base_constants.COMMAND_FLATTEN, 'data': data, 'output_display_name': file_name, 'msg_category': 'success', 'msg': f'JSON sidecar {display_name} was successfully flattened' }
def test_get_key_value(self): dict_values = SidecarMap.get_key_value('', []) self.assertIsInstance(dict_values, dict, "get_key_value should return dict when column_values empty") self.assertFalse(dict_values["HED"], "get_key_value HED key should be empty when column_values empty") dict_values = SidecarMap.get_key_value('', [], categorical=False) self.assertIsInstance(dict_values, dict, "get_key_value should return dict when column_values empty") self.assertFalse(dict_values["HED"], "get_key_value HED key should be empty when column_values empty") dict_values = SidecarMap.get_key_value('blech', {'a': 3, 'b': 2, 'c': 1}) self.assertIsInstance(dict_values, dict, "get_key_value should return dict when column_values not empty") self.assertTrue(dict_values["HED"], "get_key_value HED key should be empty when column_values empty")
def test_flatten_with_array(self): sr = SidecarMap() sidecar1 = {"a": {"c": ["blech3", "blech3a"], "d": "blech4", "e": "blech5"}, "b": "blech2"} try: sr.flatten(sidecar1) except HedFileError: pass except Exception: self.fail('process threw the wrong exception when array included in JSON') else: self.fail('process should have thrown a HedFileError when array included in JSON')
def test_flatten_col(self): sr = SidecarMap() column_dict1 = {"a": "blech1", "b": "blech2"} col_keys1, col_values1 = sr.flatten_col("tell", column_dict1) self.assertEqual(len(col_keys1), len(column_dict1.keys()) + 2, "flatten_col simple dictionary append should have a header key and footer key") self.assertEqual(len(col_keys1), len(col_values1), "flatten_col column keys and values should be of the same length") self.assertEqual(col_values1[0], 'n/a', "flatten_col header key values should be n/a") col_keys1a, col_values1a = sr.flatten_col("tell", column_dict1, append_header=False) self.assertEqual(len(col_keys1a), len(column_dict1.keys()) + 1, "flatten_col simple dictionary no append should have a header key and no footer key") self.assertEqual(len(col_keys1a), len(col_values1a), "flatten_col column keys and values should be of the same length") self.assertEqual(col_values1a[0], 'n/a', "flatten_col header key values should be n/a")
def test_flatten_sidecar_with_empty(self): sr = SidecarMap() # top level value is empty sidecar1 = {"a": {"c": "blech3", "d": "blech4", "e": "blech5"}, "b": "", "af": {"c": {"af": "blech6"}, "df_new": "blech7"}} df1 = sr.flatten(sidecar1) self.assertEqual(12, len(df1), "flatten should return a dataframe with correct number of rows") self.assertEqual(2, len(df1.columns), "flatten should return a dataframe with 2 columns") # other value is empty sidecar2 = {"a": {"c": "blech3", "d": "blech4", "e": "blech5"}, "b": "blech2", "af": {"c": {"af": "blech6"}, "df_new": ""}} df2 = sr.flatten(sidecar2) self.assertEqual(12, len(df2), "flatten should return a dataframe with correct number of rows") self.assertEqual(2, len(df2.columns), "flatten should return a dataframe with 2 columns")
def test_flatten_hed_column_names(self): sr = SidecarMap() # One categorical column sidecar1 = {"a1_col": {"HED": {"b1": "Label/B1", "c1": "Label/C1"}}, "a2_col": {"HED": {"b2": "Label/B2", "c2": "Label/C2"}}, "a3_col": {"HED": {"b3": "Label/B2", "c3": "Label/C2"}}} df1 = sr.flatten_hed(sidecar1) self.assertEqual(len(df1), 9, "When all columns are used should have all entries") df2 = sr.flatten_hed(sidecar1, ["a1_col", "a3_col"]) self.assertEqual(len(df2), 6, "When some columns are used should have appropriate entries") self.assertEqual(len(df1.columns), 2, "flatten_hed dataframe should have 2 columns") self.assertEqual(df1.iloc[1]['column'], 'b1', "flatten_hed dataframe should have right value in key") df2 = sr.flatten_hed(sidecar1, ["a1_col", "a3_col"]) self.assertEqual(len(df2), 6, "flatten_hed dataframe should have 1 more entry than HED entries for dictionary") self.assertEqual(len(df2.columns), 2, "flatten_hed dataframe should have 2 columns") self.assertEqual(df2.iloc[1]['column'], 'b1', "flatten_hed dataframe should have right value in key")
def test_unflatten(self): sr = SidecarMap() sidecar1 = {"a": "blech1", "d": "blech4", "e": "blech5"} df1 = sr.flatten(sidecar1) unflat1 = sr.unflatten(df1) self.assertEqual(sidecar1, unflat1, "unflatten should unflatten when sidecar unnested") sidecar2 = {"a": {"c1": "blech3", "c2": "blech3a"}, "d": "apple"} df2 = sr.flatten(sidecar2) unflat2 = sr.unflatten(df2) self.assertEqual(sidecar2, unflat2, "unflatten should unflatten when sidecar has single dictionary") sidecar3 = {"b": "banana", "a": {"c1": "blech3", "c2": "blech3a"}, "d": "apple"} df3 = sr.flatten(sidecar3) unflat3 = sr.unflatten(df3) self.assertEqual(sidecar3, unflat3, "unflatten should unflatten when sidecar has embedded dictionary") sidecar4 = {"a": {"c": {"c1": "blech3", "c2": "blech3a"}, "d": "blech4", "e": "blech5"}, "b": "blech2"} df4 = sr.flatten(sidecar4) unflat4 = sr.unflatten(df4) self.assertEqual(sidecar4, unflat4, "unflatten should unflatten when sidecar has nested dictionaries")
def extract(events, columns_selected): """Extracts a JSON sidecar template from a BIDS-style events file. Parameters ---------- events: EventInput An events input object columns_selected: dict dictionary of columns selected Returns ------- dict A dictionary pointing to extracted JSON file. """ columns_info = get_columns_info(events.dataframe) sr = SidecarMap() hed_dict, issues = sr.get_sidecar_dict(columns_info, columns_selected) display_name = events.name if issues: issue_str = get_printable_issue_string( issues, f"{display_name} HED validation errors") file_name = generate_filename(display_name, name_suffix='_errors', extension='.txt') return { base_constants.COMMAND: base_constants.COMMAND_VALIDATE, 'data': issue_str, "output_display_name": file_name, "msg_category": "warning", 'msg': f"Events file {display_name} had extraction errors" } else: file_name = generate_filename(display_name, name_suffix='_extracted', extension='.json') return { base_constants.COMMAND: base_constants.COMMAND_EXTRACT, 'data': json.dumps(hed_dict, indent=4), 'output_display_name': file_name, 'msg_category': 'success', 'msg': 'Events extraction to JSON complete' }
def test_flatten_col_dict(self): # Test 1 level of dictionary sr = SidecarMap() column_dict1 = {"a": "blech1", "b": "blech2"} [keys1, values1] = sr.flatten_col_dict(column_dict1) self.assertEqual(2, len(keys1), "flatten_col_dict should return keys for each element dictionary") self.assertEqual(2, len(values1), "flatten_col_dict should return values for each element dictionary") # Test 2 levels of dictionary column_dict2 = {"a": {"c": "blech3", "d": "blech4", "e": "blech5"}, "b": "blech2"} [keys2, values2] = sr.flatten_col_dict(column_dict2) self.assertEqual(6, len(keys2), "flatten_col_dict should return keys for each element + 2 for header") self.assertEqual(6, len(values2), "flatten_col_dict should return values for each element + 2 for header") column_dict3 = {"a": {"c": "blech3", "d": "blech4", "e": "blech5"}, "b": "blech2", "af": {"c": {"af": "blech6"}, "df_new": "blech7"}, "eg": {"ef": "blech8", "eh": {"eg": "blech9"}}} [keys3, values3] = sr.flatten_col_dict(column_dict3) self.assertEqual(18, len(keys3), "flatten_col_dict should return keys for each element + 2 for each header") self.assertEqual(18, len(values3), "flatten_col_dict should return values for each element + 2 for each header")
def test_get_sidecar_dict(self): column1 = {'a': 3, 'b': 2, 'c': 1} column2 = {'a1': 6, 'b1': 22} columns_info = {'column1': column1, 'column2': column2} [hed_dict, issues] = SidecarMap.get_sidecar_dict(columns_info, {}) self.assertFalse(hed_dict, "Dictionary is empty of no columns selected") self.assertTrue(issues, "Issues is not empty if no columns selected") [hed_dict, issues] = SidecarMap.get_sidecar_dict(columns_info, {'banana': False}) self.assertFalse(hed_dict, " get_sidecar_dict: Dictionary is empty of bad column selected") self.assertTrue(issues, " get_sidecar_dict: Issues is not empty if bad columns selected") [hed_dict, issues] = SidecarMap.get_sidecar_dict(columns_info, {'column1': True, 'banana': False, 'apple': True}) self.assertTrue(hed_dict, " get_sidecar_dict: Dictionary not empty if at least one good column selected") self.assertTrue(issues, " get_sidecar_dict: Issues is not empty if at least one bad column selected") self.assertEqual(len(issues), 2, "get_sidecar_dict: Same number of issues as bad columns") [hed_dict, issues] = SidecarMap.get_sidecar_dict(columns_info, {'column1': True, 'column2': False}) self.assertTrue(hed_dict, "Dictionary not empty if at least one good column selected") self.assertFalse(issues, "Issues is empty if good data provided")
def test_get_unmarked_key(self): sr = SidecarMap() unmarked1 = sr.get_unmarked_key("_*_a_b_*_") self.assertEqual(unmarked1, "a_b", "get_unmarked_key returns right level 1 value") marked2 = sr.get_unmarked_key("__*__a_b__*__") self.assertEqual(marked2, "a_b", "get_unmarked_key returns right level 2 value") marked3 = sr.get_unmarked_key("a_b") self.assertEqual(marked3, "a_b", "get_marked_key returns right level 0 unmarked value") marked4 = sr.get_unmarked_key("_*__*_") self.assertEqual(marked4, None, "get_marked_key returns None when no key") marked5 = sr.get_unmarked_key("___*__*_") self.assertEqual(marked5, None, "get_marked_key returns None when invalid")
def test_flatten_hed(self): sr = SidecarMap() # One categorical column sidecar1 = {"a_col": {"HED": {"b": "Label/B", "c": "Label/C"}}} df1 = sr.flatten_hed(sidecar1) self.assertEqual(len(df1), 3, "flatten_hed dataframe should have 1 more entry than HED entries for dictionary") self.assertEqual(len(df1.columns), 2, "flatten_hed dataframe should have 2 columns") self.assertEqual(df1.iloc[1]['column'], 'b', "flatten_hed dataframe should have right value in key") # One value column sidecar2 = {"a_col": {"HED": "Label/#"}} df2 = sr.flatten_hed(sidecar2) self.assertEqual(len(df2), 1, "flatten_hed dataframe should have same number of entries as dictionary") self.assertEqual(len(df2.columns), 2, "flatten_hed dataframe should have 2 columns") self.assertEqual(df2.iloc[0]['column'], '_*_a_col_*_', "flatten_hed dataframe should have right value in key") # A combination with other columns sidecar3 = {"a_col": {"HED": {"b": "Label/B", "c": "Label/C"}, "d": {"a1": "b1"}}, "b_col": {"HED": "Label/#"}} df3 = sr.flatten_hed(sidecar3) self.assertEqual(len(df3), 4, "flatten_hed dataframe should have 1 more entries than HED entries for dictionary") self.assertEqual(len(df3.columns), 2, "flatten_hed dataframe should have 2 columns") self.assertEqual(df3.iloc[0]['column'], '_*_a_col_*_', "flatten_hed dataframe should have right value in key") self.assertEqual(df3.iloc[3]['column'], '_*_b_col_*_', "flatten_hed dataframe should have right value in key")
def test_get_marked_key(self): sr = SidecarMap() marked1 = sr.get_marked_key("a_b", 1) self.assertEqual(marked1, "_*_a_b_*_", "get_marked_key returns right level 1 value") marked2 = sr.get_marked_key("a_b", 2) self.assertEqual(marked2, "__*__a_b__*__", "get_marked_key returns right level 2 value") marked3 = sr.get_marked_key("a_b", 0) self.assertEqual(marked3, "a_b", "get_marked_key returns right level 0 value") marked4 = sr.get_marked_key("", 1) self.assertEqual(marked4, "_*__*_", "get_marked_key returns right empty key marked value")
def test_unflatten_hed(self): sr = SidecarMap() sidecar1 = {"a_col": {"HED": {"b": "Label/B", "c": "Label/C"}}} df1 = sr.flatten_hed(sidecar1) undf1 = sr.unflatten_hed(df1) self.assertEqual(len(undf1.keys()), 1, "unflatten_hed dictionary should unpack correctly") self.assertTrue("a_col" in undf1.keys(), "The correct key is recovered") # One value column sidecar2 = {"a_col": {"HED": "Label/#"}} df2 = sr.flatten_hed(sidecar2) undf2 = sr.unflatten_hed(df2) self.assertEqual(len(undf2.keys()), 1, "unflatten_hed dictionary should unpack correctly") self.assertTrue("a_col" in undf2.keys(), "The correct key is recovered") # A combination with other columns sidecar3 = {"a_col": {"HED": {"b": "Label/B", "c": "Label/C"}, "d": {"a1": "b1"}}, "b_col": {"HED": "Label/#"}, "c_col": {"levels": "e"}} df3 = sr.flatten_hed(sidecar3) undf3 = sr.unflatten_hed(df3) self.assertEqual(len(undf3.keys()), 2, "unflatten_hed dictionary should unpack correctly")
def test_flatten(self): sr = SidecarMap() sidecar1 = {"a": {"c": {"c1": "blech3", "c2": "blech3a"}, "d": "blech4", "e": "blech5"}, "b": "blech2"} df1 = sr.flatten(sidecar1) self.assertEqual(9, len(df1), "flatten should return a dataframe with correct number of rows") self.assertEqual(2, len(df1.columns), "flatten should return a dataframe with 2 columns")
def test_unflatten_hed_from_file(self): sr = SidecarMap() file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../data/sternberg/sternberg_flattened.tsv") df = get_new_dataframe(file_path) sr.unflatten_hed(df)