def test_read_table(self): pre = PrepareData('test', '../../data/demo_data/all') admissions = pre.read_table('admissions') # Check data type of table self.assertEqual(type(admissions), pd.core.frame.DataFrame, 'Read table is not of type Pandas.DataFrame.') # Check if there is data in the table self.assertEqual(admissions['DIAGNOSIS'][2], 'SEPSIS', 'Data not read correctly') # Check if data is loaded correctly and if datetimes are marked correcly in table self.assertEqual( (admissions['DISCHTIME'] - admissions['ADMITTIME'])[3], pd.Timedelta('8 days 01:23:00'), 'Timedates not correctly marked in table.') # Test pickling files: First create empty file, use save-function of read_table, check if stored correctly file_name_pickle = '../../data/demo_data/all/pickle/admissions.p' empty_table = pd.DataFrame() pickle.dump(empty_table, open(file_name_pickle, "wb")) admissions = pre.read_table('admissions', True) loaded_table = pickle.load(open(file_name_pickle, 'rb')) print(loaded_table) self.assertEqual(loaded_table['DIAGNOSIS'][2], 'SEPSIS', 'Data is not correctly pickled.')
def test_get_table_names(self): pre = PrepareData('test', '../../data/demo_data/all') tables_in_folder = pre.get_table_names(True) self.assertEqual(tables_in_folder[0], 'admissions', 'Does not return tables names correctly.') tables_in_folder = pre.get_table_names(False) self.assertEqual(tables_in_folder[0], 'ADMISSIONS', 'Does not return tables names correctly.')
def test_read_tables(self): pre = PrepareData('test', '../../data/demo_data/all') table_list = ['admissions', 'patients'] pre.read_tables(table_list) # Check if the two tables are loaded self.assertEqual(pre.admissions['DIAGNOSIS'][2], 'SEPSIS', 'Not read multiple tables correctly.') self.assertEqual(pre.patients['SUBJECT_ID'][3], int(10017), 'Not read multiple tables correctly.')
def test__get_drug_feature(self): pre = PrepareData('test', '../../data/demo_data/all') pre.read_table('prescriptions') drug_enc = DrugEncoderV2(48, None) drug_enc.fit(pre.prescriptions) unique_drugs = drug_enc._get_drug_feature(pre.prescriptions) self.assertEqual( len(unique_drugs), len(set(unique_drugs)), 'Error in DrugEncocder._get_drug_feature(); list elements are not unique;' )
def test_load_table_pickle(self): pre_store = PrepareData('test1', '../../data/demo_data/all') pre_store.read_table('admissions', True) pre_load = PrepareData('test2', '../../data/demo_data/all') pre_load.load_table_pickle('admissions') self.assertEqual(pre_load.admissions['DIAGNOSIS'][2], 'SEPSIS', 'Not loaded table from pickle file.')
def test_load_table_pickle_list(self): table_list = ['admissions', 'patients'] pre_store = PrepareData('test1', '../../data/demo_data/all') pre_store.read_tables(table_list, True) pre_load = PrepareData('test2', '../../data/demo_data/all') pre_load.load_table_pickle_list(table_list) self.assertEqual(pre_load.admissions['DIAGNOSIS'][2], 'SEPSIS', 'Not loaded multipe tables from pickle files.') self.assertEqual(pre_load.patients['SUBJECT_ID'][3], int(10017), 'Not loaded multipe tables from pickle files.')
def test_transform(self): pre = PrepareData('test', '../../data/demo_data/all') pre.read_tables(['prescriptions', 'admissions']) drug_enc = DrugEncoderV2(48, 10) drug_enc.fit(pre.prescriptions) hadm_drug_matrix, hadm_id_mapping = drug_enc.transform( pre.prescriptions, pre.admissions) self.assertTrue( ((hadm_drug_matrix[7, :] == [0, 1, 1, 1, 0, 1, 1, 1, 1, 0]) * 1).sum(), 'Error in DrugEncoder.transform(); hadm_drug_matrix wrong') self.assertEqual( hadm_drug_matrix.shape, (len(pre.admissions), 10), 'Error in DrugEncoder.transform(); hadm_drug_matrix has wrong shape;' ) self.assertEqual(len(hadm_id_mapping), pre.admissions['HADM_ID'].unique().shape[0], 'Error in DrugEncoder.transform(); wrong shape;') self.assertEqual( drug_enc.drug_mapping, { 'FURO40I': 0, 'NS500': 1, 'NS1000': 2, 'NACLFLUSH': 3, 'INSULIN': 4, 'D5W250': 5, 'VANC1F': 6, 'VANCOBASE': 7, 'HEPA5I': 8, 'KCL20PM': 9 }, 'Error in DrugEncoder.transform(); drug_mapping is wrong')
def test_fit(self): pre = PrepareData('test', '../../data/demo_data/all') pre.read_table('prescriptions') drug_enc = DrugEncoderV2() drug_enc.fit(pre.prescriptions) self.assertEqual(drug_enc.drug_mapping['NS500'], int(1), 'Drug mapping not correct.') self.assertEqual(drug_enc.drug_mapping['MORPH100'], int(797), 'Drug mapping not correct.') # Test Encoder Parameters pre = PrepareData('test', '../../data/demo_data/all') pre.read_table('prescriptions') drug_enc = DrugEncoderV2(48, 13) drug_enc.fit(pre.prescriptions) self.assertEqual(len(drug_enc.drug_mapping), int(13), 'max_number_of_drug_features does not work.')