def process_file(headers): """You may modify this function""" # Gets the indexes for each code-type used for definitions. Do not modify field_index = {} groupby_dict = {} field_index['ICD10'] = opf.get_header_indices(headers, 'diag_icd10') field_index['ICD9'] = opf.get_header_indices(headers, 'diag_icd9') field_index['ICD_YEAR'] = opf.get_header_indices(headers, 'icd_years') field_index['Patient_ID'] = opf.get_header_indices(headers, 'Patient_ID') groupby_dict['AFIB_Codes'] = 'concat' groupby_dict['Earliest_Afib_Diagnosis'] = 'min' groupby_dict['Other_Malformations'] = 'concat' groupby_dict['Minimum_Age_At_Other_Malformations'] = 'min' return field_index, groupby_dict
def process_file(headers, output_column_names): """This function is used to find and specify which fields in Biobank you are interested in and what columns you want in your output file. You may modify this function.""" # Must Specify Field Numbers of Fields of Interest. Do Not Change field_index = {} # This tells the program where the codes are in the file field_index['ICD10'] = opf.get_header_indices( headers, '41202') + opf.get_header_indices(headers, '41204') field_index['ICD9'] = opf.get_header_indices( headers, '41203') + opf.get_header_indices(headers, '41205') field_index['SelfRep_MC'] = opf.get_header_indices(headers, '20002', '20009') field_index['Year_Of_Birth'] = opf.get_header_indices(headers, '34') output_column_names.append('Year_Of_Birth') # I want each code to be a column name for code in single_icd10_codes_of_interest: output_column_names.append(code) # I want 'Stroke' to be an output column output_column_names.append('Stroke') # I want to know what self-reported medical malformations the patient has and the minimum age at which the patient was diagnosed. So I'll have two self-reported columns. Patients will only be assigned a malformation if they meet age criteria. output_column_names.append('SelfReported_Malformations') output_column_names.append('Minimum_Age_At_SelfReported_Malformation') return field_index, output_column_names
def process_file(headers, output_column_names): """You may modify this function""" # Gets the indexes for each code-type used for definitions field_index = {} groupby_dict = {} field_index['ICD10'] = opf.get_header_indices(headers, 'diag_icd10') field_index['ICD9'] = opf.get_header_indices(headers, 'diag_icd9') field_index['OPCS'] = opf.get_header_indices(headers, 'oper4') field_index['ICD_YEAR'] = opf.get_header_indices(headers, 'icd_years')[0] field_index['OPCS_YEAR'] = opf.get_header_indices(headers, 'opcs_years')[0] field_index['Patient_ID'] = opf.get_header_indices(headers, 'Patient_ID') for d in diseases: output_column_names.append(d) groupby_dict[d] = 'min' return field_index, output_column_names, groupby_dict
# ---------------------------------------------- # (DO-NOT-CHANGE f = open(config.ukb_file) header = f.readline() headers = header.split('\t') headers[0] = '0_0' line_list = [] lines = f.readlines() f.close() # DO-NOT-CHANGE) # Must Specify Field Numbers of Fields of Interest year_of_birth_index = opf.get_header_indices(headers, '34') icd10_index = opf.get_header_indices( headers, '41202') + opf.get_header_indices(headers, '41204') opcs_index = opf.get_header_indices(headers, '41200') + opf.get_header_indices( headers, '41210') icd9_index = opf.get_header_indices(headers, '41203') + opf.get_header_indices( headers, '41205') selfrep_mc_index = opf.get_header_indices(headers, '20002', '20009') selfrep_op_index = opf.get_header_indices(headers, '20004', '20011') # Names of Columns for Output File fieldnames = ['Patient_ID'] fieldnames.append('Year_Of_Birth') fieldnames.append('CHD') fieldnames.append('Confirmed')
attrib_select = sh.sheet_to_dict(reader, s, 'Attribute', 'Input') attrib_conversion = sh.sheet_to_dict(reader, s, 'Attribute', 'Binary', dict_values=True) with open(config.ukb_file) as f: header = f.readline() headers = header.split('\t') headers[0] = '0_0' attrib_index = {} # We find the indicies of the attributes we are interested in. for a in attrib: if attrib_select[a] in ('F', 'A'): # There will only be one column associated with this attribute attrib_index[a] = opf.get_header_indices(headers, attrib[a]) else: # Output all values into separate columns all_indices = opf.get_header_indices(headers, attrib[a]) if len(all_indices) > 1: column_num = 0 for index in all_indices: new_column = a + '_' + str(column_num) # Format: Attribute_0, Attribute_1, etc. attrib_index[new_column] = [index] attrib_select[new_column] = attrib_select[a] attrib_conversion[new_column] = attrib_conversion[a] column_num += 1 else: attrib_index[a] = all_indices