Example #1
0
def process_file(headers):
    """You may modify this function"""
    # Gets the indexes for each code-type used for definitions. Do not modify
    field_index = {}
    groupby_dict = {}

    field_index['ICD10'] = opf.get_header_indices(headers, 'diag_icd10')
    field_index['ICD9'] = opf.get_header_indices(headers, 'diag_icd9')
    field_index['ICD_YEAR'] = opf.get_header_indices(headers, 'icd_years')
    field_index['Patient_ID'] = opf.get_header_indices(headers, 'Patient_ID')

    groupby_dict['AFIB_Codes'] = 'concat'
    groupby_dict['Earliest_Afib_Diagnosis'] = 'min'
    groupby_dict['Other_Malformations'] = 'concat'
    groupby_dict['Minimum_Age_At_Other_Malformations'] = 'min'

    return field_index, groupby_dict
Example #2
0
def process_file(headers, output_column_names):
    """This function is used to find and specify which fields in Biobank you are interested in and what columns you want in your output file. You may modify this function."""

    # Must Specify Field Numbers of Fields of Interest. Do Not Change
    field_index = {}

    # This tells the program where the codes are in the file
    field_index['ICD10'] = opf.get_header_indices(
        headers, '41202') + opf.get_header_indices(headers, '41204')
    field_index['ICD9'] = opf.get_header_indices(
        headers, '41203') + opf.get_header_indices(headers, '41205')
    field_index['SelfRep_MC'] = opf.get_header_indices(headers, '20002',
                                                       '20009')

    field_index['Year_Of_Birth'] = opf.get_header_indices(headers, '34')

    output_column_names.append('Year_Of_Birth')

    # I want each code to be a column name
    for code in single_icd10_codes_of_interest:
        output_column_names.append(code)

    # I want 'Stroke' to be an output column
    output_column_names.append('Stroke')

    # I want to know what self-reported medical malformations the patient has and the minimum age at which the patient was diagnosed. So I'll have two self-reported columns. Patients will only be assigned a malformation if they meet age criteria.
    output_column_names.append('SelfReported_Malformations')
    output_column_names.append('Minimum_Age_At_SelfReported_Malformation')

    return field_index, output_column_names
def process_file(headers, output_column_names):
    """You may modify this function"""
    # Gets the indexes for each code-type used for definitions
    field_index = {}
    groupby_dict = {}

    field_index['ICD10'] = opf.get_header_indices(headers, 'diag_icd10')
    field_index['ICD9'] = opf.get_header_indices(headers, 'diag_icd9')
    field_index['OPCS'] = opf.get_header_indices(headers, 'oper4')
    field_index['ICD_YEAR'] = opf.get_header_indices(headers, 'icd_years')[0]
    field_index['OPCS_YEAR'] = opf.get_header_indices(headers, 'opcs_years')[0]
    field_index['Patient_ID'] = opf.get_header_indices(headers, 'Patient_ID')

    for d in diseases:
        output_column_names.append(d)
        groupby_dict[d] = 'min'

    return field_index, output_column_names, groupby_dict
# ----------------------------------------------

# (DO-NOT-CHANGE
f = open(config.ukb_file)
header = f.readline()
headers = header.split('\t')
headers[0] = '0_0'

line_list = []
lines = f.readlines()
f.close()
# DO-NOT-CHANGE)

# Must Specify Field Numbers of Fields of Interest
year_of_birth_index = opf.get_header_indices(headers, '34')

icd10_index = opf.get_header_indices(
    headers, '41202') + opf.get_header_indices(headers, '41204')
opcs_index = opf.get_header_indices(headers, '41200') + opf.get_header_indices(
    headers, '41210')
icd9_index = opf.get_header_indices(headers, '41203') + opf.get_header_indices(
    headers, '41205')
selfrep_mc_index = opf.get_header_indices(headers, '20002', '20009')
selfrep_op_index = opf.get_header_indices(headers, '20004', '20011')

# Names of Columns for Output File
fieldnames = ['Patient_ID']
fieldnames.append('Year_Of_Birth')
fieldnames.append('CHD')
fieldnames.append('Confirmed')
Example #5
0
attrib_select = sh.sheet_to_dict(reader, s, 'Attribute', 'Input')
attrib_conversion = sh.sheet_to_dict(reader, s, 'Attribute', 'Binary', dict_values=True)

with open(config.ukb_file) as f:
    header = f.readline()
    headers = header.split('\t')
    headers[0] = '0_0'

    attrib_index = {}

    # We find the indicies of the attributes we are interested in.
    for a in attrib:

        if attrib_select[a] in ('F', 'A'): # There will only be one column associated with this attribute

            attrib_index[a] = opf.get_header_indices(headers, attrib[a])

        else: # Output all values into separate columns
            all_indices = opf.get_header_indices(headers, attrib[a])

            if len(all_indices) > 1:
                column_num = 0
                for index in all_indices:
                    new_column = a + '_' + str(column_num) # Format: Attribute_0, Attribute_1, etc.
                    attrib_index[new_column] = [index]
                    attrib_select[new_column] = attrib_select[a]
                    attrib_conversion[new_column] = attrib_conversion[a]
                    column_num += 1
            else:
                attrib_index[a] = all_indices