def generate_row_info(in_files, join_key):
    '''generate information about the row'''
    row_info = {}
    for file_name in in_files.keys():
        with open(resolve(file_name), mode='r', encoding='utf-8-sig') as file:
            reader = csv.reader(file)
            headers = next(reader, None)
            headers = [x.strip(' ') for x in headers]
            if 'columns_to_keep' in in_files[file_name]:
                headers_to_keep = in_files[file_name]['columns_to_keep']
                if in_files[file_name]['skip_first_line']:
                    next(reader, None)
            else:
                headers_to_keep = in_files[file_name][
                    'multiple_columns_to_keep']

            row_indices = get_row_indices(headers, headers_to_keep)

            for row in reader:
                join_key_col_index = headers.index(join_key)
                current_row_info = get_current_row(row_info,
                                                   row[join_key_col_index])

                for index in row_indices:
                    current_row_info[headers[index]] = row[index]

                row_info[row[join_key_col_index]] = current_row_info

    return row_info
Exemple #2
0
def generate_row_info(in_files, join_key):
    """generate information about the row"""
    row_info = {}
    first_line_metadata = "first_line_metadata"
    header_meta_dict = dict()
    for file_name in in_files.keys():
        with open(resolve(file_name), mode="r", encoding="utf-8-sig") as file:
            reader = csv.reader(file)
            headers = next(reader, None)
            headers = [x.strip(" ") for x in headers]

            if in_files[file_name]["skip_first_line"]:
                next(reader, None)

            if in_files[file_name][first_line_metadata]:
                header_meta_desc = file.readline()
                header_meta_desc = [
                    meta_header for meta_header in header_meta_desc.split(",")
                ]

                header_meta_dict = dict(zip(headers, header_meta_desc))

            if "columns_to_keep" in in_files[file_name]:
                headers_to_keep = in_files[file_name]["columns_to_keep"]
            else:
                headers_to_keep = in_files[file_name][
                    "multiple_columns_to_keep"]

            row_indices = get_row_indices(headers, headers_to_keep)

            for row in reader:
                join_key_col_index = headers.index(join_key)
                current_row_info = get_current_row(row_info,
                                                   row[join_key_col_index])

                for index in row_indices:
                    current_row_info[headers[index]] = row[index]

                row_info[row[join_key_col_index]] = current_row_info

    return row_info, header_meta_dict
Exemple #3
0
from datetime import timedelta
import petl as etl
from fhir_petl.fhir import to_json
from fhir_petl.util import resolve, mkdirp, number, join, year, dateparser, ISOFormat

def map_race(race):
    return {
        'AMERICAN INDIAN AND ALASKA NATIVE': ('http://hl7.org/fhir/v3/Race', '1002-5', 'American Indian or Alaska Native'),
        'ASIAN': ('http://hl7.org/fhir/v3/Race', '2028-9', 'Asian'),
        'BLACK OR AFRICAN AMERICAN': ('http://hl7.org/fhir/v3/Race', '2054-5', 'Black or African American'),
        'HISPANIC OR LATINO': ('http://hl7.org/fhir/v3/Race', '2106-3', 'White'),
        'WHITE': ('http://hl7.org/fhir/v3/Race', '2106-3', 'White'),
        'NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER': ('http://hl7.org/fhir/v3/Race', '2076-8', 'Native Hawaiian or Other Pacific Islander')
    }.get(race, None)

patients = (etl.io.csv.fromcsv(resolve('work/Patient.csv'))
            .fieldmap({
                'id': 'ID',
                'STUDYID': 'STUDYID',
                'subject_id': ('STUDYID', lambda x: 'CASE-' + x),
                'race': ('RACE', map_race),
                'gender': ('SEX', {'F': 'female', 'M': 'male'}),
                'birth_date': ('BIRTH_YR', year),
                'index_date': ('INDEX_YEAR', dateparser('%Y', ISOFormat.DAY)),
                'tag': lambda rec: ('subject-type', 'case')
            }, True))

index = (patients
         .cut('STUDYID', 'id', 'index_date')
         .rename('id', 'subject'))
import petl as etl
from fhir_petl.util import preprocess, resolve, mkdirp
import random

mkdirp(resolve('work'))

selection = etl.io.csv.fromtsv(resolve('sa1cases.txt')).columns()['STUDYID']
selection = set(random.sample(selection, 1000))

preprocess(
    etl.io.csv.fromcsv(resolve('case_demog_27065.csv')).selectin(
        'STUDYID', selection), 'STUDYID').tocsv(resolve('work/Patient.csv'))

preprocess(
    etl.io.csv.fromtsv(resolve('dx_case_inst.txt')).selectin(
        'STUDYID', selection),
    'STUDYID',
).tocsv(resolve('work/Condition.csv'))

preprocess(
    etl.io.csv.fromtsv(resolve('lab_case_inst.txt')).selectin(
        'STUDYID', selection),
    'STUDYID').tocsv(resolve('work/Observation.csv'))

preprocess(
    etl.io.csv.fromtsv(resolve('med_case_inst_gpi.txt')).selectin(
        'CASE_ID', selection),
    'CASE_ID').tocsv(resolve('work/MedicationDispense.csv'))

preprocess(
    etl.io.csv.fromtsv(resolve('order_case_inst_gpi.txt')).selectin(
Exemple #5
0
        ('http://hl7.org/fhir/v3/Race', '2106-3', 'White'),
        'WHITE': ('http://hl7.org/fhir/v3/Race', '2106-3', 'White'),
        'NATIVE HAWAIIAN AND OTHER PACIFIC ISLANDER':
        ('http://hl7.org/fhir/v3/Race', '2076-8',
         'Native Hawaiian or Other Pacific Islander')
    }.get(race, None)


def index_date(rec):
    birth = number(rec['BIRTH_YR'])
    index_age = number(rec['INDEX_AGE'])
    index_date = str(birth + index_age)
    return dateparser('%Y', ISOFormat.DAY)(index_date)


patients = (etl.io.csv.fromcsv(resolve('work/Patient.csv')).fieldmap(
    {
        'id': 'ID',
        'CONTROL_ID': 'control_id',
        'subject_id': ('control_id', lambda x: 'CONTROL-' + x),
        'race': ('RACE', map_race),
        'gender': ('SEX', {
            'F': 'female',
            'M': 'male'
        }),
        'birth_date': ('BIRTH_YR', year),
        'index_date': index_date,
        'tag': lambda rec: ('subject-type', 'control')
    }, True))

index = (patients.cut('CONTROL_ID', 'id',
Exemple #6
0
    elif month and text and not day:
        parser = dateparser("%m/%Y", ISOFormat.MONTH)
        date_string = "{0}/{1}".format(month, text)

    elif not complete_date_bool:
        parser = dateparser("%Y", ISOFormat.YEAR)
        date_string = "{0}".format(text)
    else:
        parser = dateparser("%m/%d/%Y", ISOFormat.DAY)
        date_string = "{0}".format(text)

    return parser(date_string)


patients = etl.io.csv.fromcsv(resolve("work/Patient_ktb_updated.csv")).fieldmap(
    {
        "id": "ID",
        "subject_id": "SID",
        "SID": "SID",
        "race": ("RACE", map_race),
        "ethnicity": ("ETHNICITY", map_ethnicity),
        "marital_status": ("MARITAL_STATUS", map_marital_status),
        # "gender": ("GENDER", {"F": "female", "M": "male"}),
        # "birth_date": ("BIRTH_DATE", parse_date),
        # 'death_date': ('DEATH_YR', year),
        # 'sample_date': ('SAMPLE_DATE', sample_date),
        # 'tag': ('Cohort', lambda cohort: ('cohort', cohort.upper()))
    },
    True,
)
Exemple #7
0
import petl as etl
from fhir_petl.util import resolve

observations = (etl.io.csv.fromcsv(resolve('work/Observation.csv')))
patients = (etl.io.csv.fromcsv(resolve('work/Patient.csv')))
procedures = (etl.io.csv.fromcsv(resolve('work/Procedure.csv')))
conditions = (etl.io.csv.fromcsv(resolve('work/Condition.csv')))
requests = (etl.io.csv.fromcsv(resolve('work/MedicationRequest.csv')))
dispenses = (etl.io.csv.fromcsv(resolve('work/MedicationDispense.csv')))

print(requests.look(10))
Exemple #8
0
import petl as etl
from fhir_petl.util import preprocess, resolve, mkdirp

mkdirp(resolve('work'))

preprocess(etl.io.csv.fromcsv(
    resolve('Table_1_Demographics_New_Cohorts.csv'))).tocsv(
        resolve('work/Patient.csv'))

preprocess(etl.io.csv.fromcsv(resolve('Diagnoses.csv'))).tocsv(
    resolve('work/Condition.csv'))

preprocess(etl.io.csv.fromcsv(resolve('fairbanks_cv.dedup.csv'))).tocsv(
    resolve('work/Observation.csv'))

preprocess(etl.io.csv.fromcsv(resolve('Prescriptions.csv'))).tocsv(
    resolve('work/MedicationRequest.csv'))

preprocess(etl.io.csv.fromcsv(resolve('Procedures.csv'))).tocsv(
    resolve('work/Procedure.csv'))
import petl as etl
from fhir_petl.util import preprocess, resolve, mkdirp
import random

mkdirp(resolve('work'))

selection = etl.io.csv.fromtsv(
    resolve('sa1controls.txt')).columns()['control_id']
selection = set(random.sample(selection, 1000))

preprocess(
    etl.io.csv.fromtsv(resolve('controls.txt')).selectin(
        'control_id', selection),
    'control_id').tocsv(resolve('work/Patient.csv'))

preprocess(
    etl.io.csv.fromtsv(resolve('dx_control_inst.txt')).selectin(
        'CONTROL_ID', selection),
    'CONTROL_ID',
).tocsv(resolve('work/Condition.csv'))

preprocess(
    etl.io.csv.fromtsv(resolve('lab_control_inst.txt')).selectin(
        'CONTROL_ID', selection),
    'CONTROL_ID').tocsv(resolve('work/Observation.csv'))

preprocess(
    etl.io.csv.fromtsv(resolve('med_control_inst_gpi.txt')).selectin(
        'CONTROL_ID', selection),
    'CONTROL_ID').tocsv(resolve('work/MedicationDispense.csv'))