def test_update_map_missing_key(self): keys = self.key_cols + ['another'] t_map = KeyTemplate(keys) stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0) t_map.update(stern_df) self.assertEqual(len(t_map.col_map.columns), len(self.key_cols) + 1, "update should have all of the columns")
def test_update_map_duplicate_keys(self): t_map = KeyTemplate(self.key_cols) stern_df = pd.read_csv(self.stern_test2_path, delimiter='\t', header=0) t_map.update(stern_df) self.assertEqual( len(t_map.count_dict), len(t_map.map_dict), "The count dictionary and key dictionary should have same number of values" )
def test_update_map(self): t_map = KeyTemplate(self.key_cols) stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0) t_map.update(stern_df) df_map = t_map.col_map df_dict = t_map.map_dict self.assertEqual(len(df_map), len(stern_df), "update map should contain all the entries") self.assertEqual(len(df_dict.keys()), len(stern_df), "update dictionary should contain all the entries")
def test_update_key(self): t_map = KeyTemplate(self.target_cols) has_duplicate = t_map.update_by_tuple(('apple', 'banana', 'pear')) df_map = t_map.col_map df_dict = t_map.map_dict self.assertEqual(len(df_map), 1, "update map should contain all the entries") self.assertEqual(len(df_map.columns), 3, "update_by_tuple should have all of the keys") self.assertEqual(len(df_dict.keys()), 1, "update dictionary should contain all columns") self.assertFalse( has_duplicate, "update should not have any duplicates for stern map")
def test_print(self): from io import StringIO t_map = KeyTemplate(self.key_cols + self.target_cols) t_map.update(self.stern_map_path) t_map.update(self.stern_map_path) with mock.patch('sys.stdout', new=StringIO()): t_map.print() print("This should be eaten by the StringIO")
def test_key_hash_use(self): t_map = KeyTemplate(['type']) stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") t_map.update(stern_df) t_col = t_map.col_map for index, row in stern_df.iterrows(): key = get_row_hash(row, t_map.columns) key_value = t_map.map_dict[key] self.assertEqual(t_col.iloc[key_value]['type'], row['type'], "The key should be looked up for same map") stern_test1 = pd.read_csv(self.stern_test1_path, delimiter='\t', header=0) for index, row in stern_test1.iterrows(): key = get_row_hash(row, t_map.columns) key_value = t_map.map_dict[key] self.assertEqual(t_col.iloc[key_value]['type'], row['type'], "The key should be looked up for other file")
def test_constructor(self): t_map = KeyTemplate(self.key_cols) self.assertIsInstance(t_map, KeyTemplate) df = t_map.col_map self.assertIsInstance(df, pd.DataFrame) try: KeyTemplate([]) except HedFileError: pass except Exception as ex: self.fail( f'KeyTemplate threw the wrong exception {ex} when no columns') else: self.fail( 'KeyTemplate should have thrown a HedFileError exception when no columns' ) emap1 = KeyTemplate(['a', 'b', 'c'], name='baloney') self.assertIsInstance(emap1, KeyTemplate, "KeyTemplate: multiple columns are okay") self.assertEqual( len(emap1.col_map.columns), 3, "The column map should have correct number of columns")
def test_make_template_key_overlap(self): t_map = KeyTemplate(['event_type', 'type']) t_map.update(self.stern_map_path) try: t_map.make_template(['Bananas', 'type', 'Pears']) except HedFileError: pass except Exception as ex: self.fail( f'make_template threw the wrong exception {ex} when additional columns overlapped keys' ) else: self.fail( 'KeyTemplate should have thrown a HedFileError exception when key overlap but threw none' )
def test_update_map_not_unique(self): t_map = KeyTemplate(self.target_cols) stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0) t_map.update(stern_df) self.assertEqual(len(t_map.col_map.columns), 3, "update should produce correct number of columns") self.assertEqual(len(t_map.col_map), len(stern_df) - 1, "update should produce the correct number of rows") for key, value in t_map.count_dict.items(): self.assertGreaterEqual( value, 1, "update the counts should all be one for unique map") t_map.update(stern_df) for key, value in t_map.count_dict.items(): self.assertGreaterEqual( value, 2, "update the counts should all be one for second update with same map" ) self.assertEqual(len(t_map.col_map.columns), 3, "update should produce correct number of columns") self.assertEqual(len(t_map.col_map), len(stern_df) - 1, "update should produce the correct number of rows")
def test_make_template(self): t_map = KeyTemplate(self.key_cols) stern_df = pd.read_csv(self.stern_map_path, delimiter='\t', header=0) t_map.update(stern_df) df1 = t_map.make_template() self.assertIsInstance(df1, pd.DataFrame, "make_template should return a DataFrame") self.assertEqual( len(df1.columns), 1, "make_template should return 1 column single key, no additional columns" ) t_map2 = KeyTemplate(['event_type', 'type']) t_map2.update(self.stern_map_path) df2 = t_map2.make_template() self.assertIsInstance(df2, pd.DataFrame, "make_template should return a DataFrame") self.assertEqual( len(df2.columns), 2, "make_template should return 2 columns w 2 keys, no additional columns" ) df3 = t_map2.make_template(['bananas', 'pears', 'apples']) self.assertIsInstance(df3, pd.DataFrame, "make_template should return a DataFrame") self.assertEqual( len(df3.columns), 5, "make_template should return 5 columns w 2 keys, 3 additional columns" )