def update(self, data): """ Updates the existing map with information from data. Args: data (DataFrame or str): DataFrame or filename of an events file or event map Returns: list Indices of duplicates """ df = get_new_dataframe(data) remove_quotes(df) col_list = df.columns.values.tolist() keys_present, keys_missing = separate_columns(col_list, self.key_cols) if keys_missing: raise HedFileError( "MissingKeyColumn", f"make_template data does not have key columns {str(keys_missing)}", "") base_df = pd.DataFrame(columns=self.columns) base_df[self.key_cols] = df[self.key_cols].values targets_present, targets_missing = separate_columns( col_list, self.target_cols) if targets_present: base_df[targets_present] = df[targets_present].values if targets_missing: base_df[targets_missing] = 'n/a' return self._update(base_df)
def test_delete_columns(self): df = get_new_dataframe(self.stern_map_path) col_list = ['banana', 'event_type', 'letter', 'apple', 'orange'] self.assertEqual(len(list(df)), 4, "stern_map should have 4 columns before deletion") delete_columns(df, col_list) self.assertEqual(len(list(df)), 2, "stern_map should have 2 columns after deletion")
def test_get_columns_info(self): df = get_new_dataframe(self.stern_test2_path) col_info = get_columns_info(df) self.assertIsInstance(col_info, dict, "get_columns_info should return a dictionary") self.assertEqual( len(col_info.keys()), len(df.columns), "get_columns_info should return a dictionary with a key for each column" )
def test_get_new_dataframe(self): df_new = get_new_dataframe(self.stern_map_path) self.assertIsInstance(df_new, DataFrame) self.assertEqual( len(df_new), 87, "get_new_dataframe should return correct number of rows") self.assertEqual( len(df_new.columns), 4, "get_new_dataframe should return correct number of rows") df_new1 = get_new_dataframe(self.stern_map_path) self.assertIsInstance(df_new1, DataFrame) self.assertEqual( len(df_new1), 87, "get_new_dataframe should return correct number of rows") self.assertEqual( len(df_new1.columns), 4, "get_new_dataframe should return correct number of rows") df_new.iloc[0]['type'] = 'Pear' self.assertNotEqual(df_new.iloc[0]['type'], df_new1.iloc[0]['type'], "get_new_dataframe returns a new dataframe")
def get_key_counts(root_dir, skip_cols=None): file_list = get_file_list(root_dir, name_suffix="_events", extensions=[".tsv"]) count_dicts = {} for file in file_list: dataframe = get_new_dataframe(file) for col_name, col_values in dataframe.iteritems(): if skip_cols and col_name in skip_cols: continue update_dict_counts(count_dicts, col_name, col_values) return count_dicts
def test_print(self): from io import StringIO t_map = ColumnDict() t_map.update(self.stern_map_path) df = get_new_dataframe(self.stern_map_path) t_map.update(self.stern_map_path) self.assertEqual( len(t_map.categorical_info.keys()), len(df.columns), "ColumnDict should have all columns as categorical if no value or skip are given" ) with mock.patch('sys.stdout', new=StringIO()): t_map.print() print("This should be eaten by the StringIO")
def test_update_dict_counts(self): file_name = os.path.join( self.bids_dir, 'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv') file_name = os.path.abspath(file_name) dataframe = get_new_dataframe(file_name) count_dicts = {} update_dict_counts(count_dicts, "onset", dataframe["onset"]) self.assertTrue("onset" in count_dicts, "update_dict_counts updates a column counts") self.assertEqual(len(count_dicts["onset"]), 551, "update_dict_counts has the right number of counts") update_dict_counts(count_dicts, "onset", dataframe["onset"]) self.assertEqual(len(count_dicts["onset"]), 551, "update_dict_counts has the right number of counts")
def update(self, data): """ Takes a dataframe containing an key map and updates the existing map Args: data (str or DataFrame): File name or DataFrame containing event-type data. """ df = get_new_dataframe(data) remove_quotes(df) col_list = df.columns.values.tolist() cols_present, cols_missing = separate_columns(col_list, self.columns) base_df = pd.DataFrame(columns=self.columns) base_df[cols_present] = df[cols_present].values base_df[cols_missing] = 'n/a' self._update(base_df)
def test_get_columns_info_skip_columns(self): df = get_new_dataframe(self.stern_test2_path) col_info = get_columns_info(df, ['latency']) self.assertIsInstance(col_info, dict, "get_columns_info should return a dictionary") self.assertEqual( len(col_info.keys()), len(df.columns) - 1, "get_columns_info should return a dictionary with a key for each column included" ) col_info = get_columns_info(df, list(df.columns.values)) self.assertIsInstance(col_info, dict, "get_columns_info should return a dictionary") self.assertFalse( col_info, "get_columns_info should return a dictionary with a key for each column included" )
def update(self, data): """ Extracts the number of times each unique value appears in each column. Args: data (DataFrame or str): The DataFrame to be analyzed or the full path of a tsv file. Returns: dict: A dictionary with keys that are column names and values that are dictionaries of unique value counts """ df = get_new_dataframe(data) for col_name, col_values in df.iteritems(): if self.skip_cols and col_name in self.skip_cols: continue if col_name in self.value_info.keys(): self.value_info[col_name] = self.value_info[col_name] + len( col_values) else: col_values = col_values.astype(str) values = col_values.value_counts(ascending=True) self._update_categorical(col_name, values)
def make_combined_dicts(file_dict, skip_cols=None): """ Return a combined dictionary of column information as we Args: file_dict (dict): Dictionary of file name keys and full path skip_cols (list): Name of the column Returns: dict: A combined dictionary """ dicts_all = ColumnDict(skip_cols=skip_cols) dicts = {} for key, file in file_dict.items(): orig_dict = ColumnDict(skip_cols=skip_cols) df = get_new_dataframe(file) orig_dict.update(df) dicts[key] = orig_dict dicts_all.update_dict(orig_dict) return dicts_all, dicts
def test_reorder_columns(self): df = get_new_dataframe(self.stern_map_path) df_new = reorder_columns(df, ['event_type', 'type']) self.assertEqual( len(df_new), 87, "reorder_columns should return correct number of rows") self.assertEqual( len(df_new.columns), 2, "reorder_columns should return correct number of rows") self.assertEqual( len(df), 87, "reorder_columns should return correct number of rows") self.assertEqual( len(df.columns), 4, "reorder_columns should return correct number of rows") df_new1 = reorder_columns(df, ['event_type', 'type', 'baloney']) self.assertEqual( len(df_new1), 87, "reorder_columns should return correct number of rows") self.assertEqual( len(df_new1.columns), 2, "reorder_columns should return correct number of rows")
def remap(self, data): """ Takes a dataframe or filename and remaps the columns Args: data (DataFrame, str) : Data whose columns are to be remapped Returns: DataFrame New dataframe with columns remapped list List of row numbers that had no correspondence in the mapping """ df_new = get_new_dataframe(data) remove_quotes(df_new) present_keys, missing_keys = separate_columns( df_new.columns.values.tolist(), self.key_cols) if missing_keys: raise HedFileError( "MissingKeys", f"File must have key columns {str(self.key_cols)}", "") df_new[self.target_cols] = 'n/a' missing_indices = self._remap(df_new) return df_new, missing_indices
def test_remove_quotes(self): df1 = get_new_dataframe(self.stern_test2_path) remove_quotes(df1) df2 = get_new_dataframe(self.stern_test3_path) self.assertEqual(df1.loc[0, 'stimulus'], df2.loc[0, 'stimulus'], "remove_quotes should have quotes removed")
def test_unflatten_hed_from_file(self): sr = SidecarMap() file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../data/sternberg/sternberg_flattened.tsv") df = get_new_dataframe(file_path) sr.unflatten_hed(df)
def set_contents(self): self.contents = get_new_dataframe(self.file_path)