Example #1
0
    def update(self, data):
        """ Updates the existing map with information from data.

        Args:
            data (DataFrame or str):     DataFrame or filename of an events file or event map

        Returns:
            list                   Indices of duplicates
        """
        df = get_new_dataframe(data)
        remove_quotes(df)
        col_list = df.columns.values.tolist()
        keys_present, keys_missing = separate_columns(col_list, self.key_cols)
        if keys_missing:
            raise HedFileError(
                "MissingKeyColumn",
                f"make_template data does not have key columns {str(keys_missing)}",
                "")
        base_df = pd.DataFrame(columns=self.columns)
        base_df[self.key_cols] = df[self.key_cols].values
        targets_present, targets_missing = separate_columns(
            col_list, self.target_cols)
        if targets_present:
            base_df[targets_present] = df[targets_present].values
        if targets_missing:
            base_df[targets_missing] = 'n/a'
        return self._update(base_df)
 def test_delete_columns(self):
     df = get_new_dataframe(self.stern_map_path)
     col_list = ['banana', 'event_type', 'letter', 'apple', 'orange']
     self.assertEqual(len(list(df)), 4,
                      "stern_map should have 4 columns before deletion")
     delete_columns(df, col_list)
     self.assertEqual(len(list(df)), 2,
                      "stern_map should have 2 columns after deletion")
Example #3
0
 def test_get_columns_info(self):
     df = get_new_dataframe(self.stern_test2_path)
     col_info = get_columns_info(df)
     self.assertIsInstance(col_info, dict,
                           "get_columns_info should return a dictionary")
     self.assertEqual(
         len(col_info.keys()), len(df.columns),
         "get_columns_info should return a dictionary with a key for each column"
     )
 def test_get_new_dataframe(self):
     df_new = get_new_dataframe(self.stern_map_path)
     self.assertIsInstance(df_new, DataFrame)
     self.assertEqual(
         len(df_new), 87,
         "get_new_dataframe should return correct number of rows")
     self.assertEqual(
         len(df_new.columns), 4,
         "get_new_dataframe should return correct number of rows")
     df_new1 = get_new_dataframe(self.stern_map_path)
     self.assertIsInstance(df_new1, DataFrame)
     self.assertEqual(
         len(df_new1), 87,
         "get_new_dataframe should return correct number of rows")
     self.assertEqual(
         len(df_new1.columns), 4,
         "get_new_dataframe should return correct number of rows")
     df_new.iloc[0]['type'] = 'Pear'
     self.assertNotEqual(df_new.iloc[0]['type'], df_new1.iloc[0]['type'],
                         "get_new_dataframe returns a new dataframe")
Example #5
0
def get_key_counts(root_dir, skip_cols=None):
    file_list = get_file_list(root_dir,
                              name_suffix="_events",
                              extensions=[".tsv"])
    count_dicts = {}
    for file in file_list:
        dataframe = get_new_dataframe(file)
        for col_name, col_values in dataframe.iteritems():
            if skip_cols and col_name in skip_cols:
                continue
            update_dict_counts(count_dicts, col_name, col_values)
    return count_dicts
Example #6
0
 def test_print(self):
     from io import StringIO
     t_map = ColumnDict()
     t_map.update(self.stern_map_path)
     df = get_new_dataframe(self.stern_map_path)
     t_map.update(self.stern_map_path)
     self.assertEqual(
         len(t_map.categorical_info.keys()), len(df.columns),
         "ColumnDict should have all columns as categorical if no value or skip are given"
     )
     with mock.patch('sys.stdout', new=StringIO()):
         t_map.print()
         print("This should be eaten by the StringIO")
Example #7
0
 def test_update_dict_counts(self):
     file_name = os.path.join(
         self.bids_dir,
         'sub-002/eeg/sub-002_task-FacePerception_run-1_events.tsv')
     file_name = os.path.abspath(file_name)
     dataframe = get_new_dataframe(file_name)
     count_dicts = {}
     update_dict_counts(count_dicts, "onset", dataframe["onset"])
     self.assertTrue("onset" in count_dicts,
                     "update_dict_counts updates a column counts")
     self.assertEqual(len(count_dicts["onset"]), 551,
                      "update_dict_counts has the right number of counts")
     update_dict_counts(count_dicts, "onset", dataframe["onset"])
     self.assertEqual(len(count_dicts["onset"]), 551,
                      "update_dict_counts has the right number of counts")
Example #8
0
    def update(self, data):
        """ Takes a dataframe containing an key map and updates the existing map

        Args:
            data (str or DataFrame):        File name or DataFrame containing event-type data.

        """

        df = get_new_dataframe(data)
        remove_quotes(df)
        col_list = df.columns.values.tolist()
        cols_present, cols_missing = separate_columns(col_list, self.columns)
        base_df = pd.DataFrame(columns=self.columns)
        base_df[cols_present] = df[cols_present].values
        base_df[cols_missing] = 'n/a'
        self._update(base_df)
Example #9
0
 def test_get_columns_info_skip_columns(self):
     df = get_new_dataframe(self.stern_test2_path)
     col_info = get_columns_info(df, ['latency'])
     self.assertIsInstance(col_info, dict,
                           "get_columns_info should return a dictionary")
     self.assertEqual(
         len(col_info.keys()),
         len(df.columns) - 1,
         "get_columns_info should return a dictionary with a key for each column included"
     )
     col_info = get_columns_info(df, list(df.columns.values))
     self.assertIsInstance(col_info, dict,
                           "get_columns_info should return a dictionary")
     self.assertFalse(
         col_info,
         "get_columns_info should return a dictionary with a key for each column included"
     )
Example #10
0
    def update(self, data):
        """ Extracts the number of times each unique value appears in each column.

        Args:
            data (DataFrame or str):    The DataFrame to be analyzed or the full path of a tsv file.

        Returns:
            dict:   A dictionary with keys that are column names and values that are dictionaries of unique value counts
        """
        df = get_new_dataframe(data)
        for col_name, col_values in df.iteritems():
            if self.skip_cols and col_name in self.skip_cols:
                continue
            if col_name in self.value_info.keys():
                self.value_info[col_name] = self.value_info[col_name] + len(
                    col_values)
            else:
                col_values = col_values.astype(str)
                values = col_values.value_counts(ascending=True)
                self._update_categorical(col_name, values)
Example #11
0
def make_combined_dicts(file_dict, skip_cols=None):
    """ Return a combined dictionary of column information as we

    Args:
        file_dict (dict):  Dictionary of file name keys and full path
        skip_cols (list):  Name of the column

    Returns:
        dict:  A combined dictionary
    """

    dicts_all = ColumnDict(skip_cols=skip_cols)
    dicts = {}
    for key, file in file_dict.items():
        orig_dict = ColumnDict(skip_cols=skip_cols)
        df = get_new_dataframe(file)
        orig_dict.update(df)
        dicts[key] = orig_dict
        dicts_all.update_dict(orig_dict)
    return dicts_all, dicts
Example #12
0
 def test_reorder_columns(self):
     df = get_new_dataframe(self.stern_map_path)
     df_new = reorder_columns(df, ['event_type', 'type'])
     self.assertEqual(
         len(df_new), 87,
         "reorder_columns should return correct number of rows")
     self.assertEqual(
         len(df_new.columns), 2,
         "reorder_columns should return correct number of rows")
     self.assertEqual(
         len(df), 87,
         "reorder_columns should return correct number of rows")
     self.assertEqual(
         len(df.columns), 4,
         "reorder_columns should return correct number of rows")
     df_new1 = reorder_columns(df, ['event_type', 'type', 'baloney'])
     self.assertEqual(
         len(df_new1), 87,
         "reorder_columns should return correct number of rows")
     self.assertEqual(
         len(df_new1.columns), 2,
         "reorder_columns should return correct number of rows")
Example #13
0
    def remap(self, data):
        """ Takes a dataframe or filename and remaps the columns

        Args:
            data (DataFrame, str) :        Data whose columns are to be remapped

        Returns:
            DataFrame                      New dataframe with columns remapped
            list                           List of row numbers that had no correspondence in the mapping
        """

        df_new = get_new_dataframe(data)
        remove_quotes(df_new)
        present_keys, missing_keys = separate_columns(
            df_new.columns.values.tolist(), self.key_cols)
        if missing_keys:
            raise HedFileError(
                "MissingKeys",
                f"File must have key columns {str(self.key_cols)}", "")
        df_new[self.target_cols] = 'n/a'
        missing_indices = self._remap(df_new)
        return df_new, missing_indices
Example #14
0
 def test_remove_quotes(self):
     df1 = get_new_dataframe(self.stern_test2_path)
     remove_quotes(df1)
     df2 = get_new_dataframe(self.stern_test3_path)
     self.assertEqual(df1.loc[0, 'stimulus'], df2.loc[0, 'stimulus'],
                      "remove_quotes should have quotes removed")
 def test_unflatten_hed_from_file(self):
     sr = SidecarMap()
     file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                              "../data/sternberg/sternberg_flattened.tsv")
     df = get_new_dataframe(file_path)
     sr.unflatten_hed(df)
Example #16
0
 def set_contents(self):
     self.contents = get_new_dataframe(self.file_path)