예제 #1
0
 def test_make_template(self):
     t_map = KeyTemplate(self.key_cols)
     stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0)
     t_map.update(stern_df)
     df1 = t_map.make_template()
     self.assertIsInstance(df1, pd.DataFrame,
                           "make_template should return a DataFrame")
     self.assertEqual(
         len(df1.columns), 1,
         "make_template should return 1 column single key, no additional columns"
     )
     t_map2 = KeyTemplate(['event_type', 'type'])
     t_map2.update(self.stern_map_path)
     df2 = t_map2.make_template()
     self.assertIsInstance(df2, pd.DataFrame,
                           "make_template should return a DataFrame")
     self.assertEqual(
         len(df2.columns), 2,
         "make_template should return 2 columns w 2 keys, no additional columns"
     )
     df3 = t_map2.make_template(['bananas', 'pears', 'apples'])
     self.assertIsInstance(df3, pd.DataFrame,
                           "make_template should return a DataFrame")
     self.assertEqual(
         len(df3.columns), 5,
         "make_template should return 5 columns w 2 keys, 3 additional columns"
     )
예제 #2
0
 def test_update_map_missing_key(self):
     keys = self.key_cols + ['another']
     t_map = KeyTemplate(keys)
     stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0)
     t_map.update(stern_df)
     self.assertEqual(len(t_map.col_map.columns),
                      len(self.key_cols) + 1,
                      "update should have all of the columns")
예제 #3
0
 def test_print(self):
     from io import StringIO
     t_map = KeyTemplate(self.key_cols + self.target_cols)
     t_map.update(self.stern_map_path)
     t_map.update(self.stern_map_path)
     with mock.patch('sys.stdout', new=StringIO()):
         t_map.print()
         print("This should be eaten by the StringIO")
예제 #4
0
 def test_update_map_duplicate_keys(self):
     t_map = KeyTemplate(self.key_cols)
     stern_df = pd.read_csv(self.stern_test2_path, delimiter='\t', header=0)
     t_map.update(stern_df)
     self.assertEqual(
         len(t_map.count_dict), len(t_map.map_dict),
         "The count dictionary and key dictionary should have same number of values"
     )
예제 #5
0
 def test_update_map(self):
     t_map = KeyTemplate(self.key_cols)
     stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0)
     t_map.update(stern_df)
     df_map = t_map.col_map
     df_dict = t_map.map_dict
     self.assertEqual(len(df_map), len(stern_df),
                      "update map should contain all the entries")
     self.assertEqual(len(df_dict.keys()), len(stern_df),
                      "update dictionary should contain all the entries")
예제 #6
0
 def test_make_template_key_overlap(self):
     t_map = KeyTemplate(['event_type', 'type'])
     t_map.update(self.stern_map_path)
     try:
         t_map.make_template(['Bananas', 'type', 'Pears'])
     except HedFileError:
         pass
     except Exception as ex:
         self.fail(
             f'make_template threw the wrong exception {ex} when additional columns overlapped keys'
         )
     else:
         self.fail(
             'KeyTemplate should have thrown a HedFileError exception when key overlap but threw none'
         )
예제 #7
0
    def test_key_hash_use(self):
        t_map = KeyTemplate(['type'])
        stern_df = pd.read_csv(self.stern_map_path,
                               delimiter='\t',
                               header=0,
                               keep_default_na=False,
                               na_values=",null")
        t_map.update(stern_df)
        t_col = t_map.col_map
        for index, row in stern_df.iterrows():
            key = get_row_hash(row, t_map.columns)
            key_value = t_map.map_dict[key]
            self.assertEqual(t_col.iloc[key_value]['type'], row['type'],
                             "The key should be looked up for same map")

        stern_test1 = pd.read_csv(self.stern_test1_path,
                                  delimiter='\t',
                                  header=0)
        for index, row in stern_test1.iterrows():
            key = get_row_hash(row, t_map.columns)
            key_value = t_map.map_dict[key]
            self.assertEqual(t_col.iloc[key_value]['type'], row['type'],
                             "The key should be looked up for other file")
예제 #8
0
 def test_update_map_not_unique(self):
     t_map = KeyTemplate(self.target_cols)
     stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0)
     t_map.update(stern_df)
     self.assertEqual(len(t_map.col_map.columns), 3,
                      "update should produce correct number of columns")
     self.assertEqual(len(t_map.col_map),
                      len(stern_df) - 1,
                      "update should produce the correct number of rows")
     for key, value in t_map.count_dict.items():
         self.assertGreaterEqual(
             value, 1, "update the counts should all be one for unique map")
     t_map.update(stern_df)
     for key, value in t_map.count_dict.items():
         self.assertGreaterEqual(
             value, 2,
             "update the counts should all be one for second update with same map"
         )
     self.assertEqual(len(t_map.col_map.columns), 3,
                      "update should produce correct number of columns")
     self.assertEqual(len(t_map.col_map),
                      len(stern_df) - 1,
                      "update should produce the correct number of rows")