Esempio n. 1
0
parser = argparse.ArgumentParser(
    description=
    'A filelist of input / output files, one filename per row no quote marks')
parser.add_argument('-B', required=True, help='full path to the Bib')
parser.add_argument('-C', required=True, help='full path to the Con')
parser.add_argument('-E', required=True, help='full path to the Enc')
parser.add_argument('-O', required=True, help='full path to the output file')

args = parser.parse_args()
fileBib = args.B
fileCon = args.C
fileEnc = args.E
fileout = args.O

bib = rp.RPDR_query(name='Bib', filein=fileBib)
con = rp.RPDR_query(name='Con', filein=fileCon)
enc = rp.RPDR_query(name='Enc', filein=fileEnc)

bibdf = bib.lst_2_pd()
condf = con.lst_2_pd()
encdf = enc.lst_2_pd()

encdf['last_visit_date'] = pd.to_datetime(encdf.Admit_Date)
# if there might be potential missingness in the data file, run below, might be slow for big data files:
# encdf['last_visit_date'] = encdf['Admit_Date'].apply(lambda x: pd.to_datetime(x) if(pd.notnull(x)) else x)
# if the date format in all RPDR files is in ISO 8601, another code could be used, might be faster:
# encdf['last_visit_date'] = pd.to_datetime(encdf.Admit_Date, format = '%d/%m/%y')

encdf = encdf.sort_values(['EMPI',
                           'last_visit_date']).drop_duplicates(subset='EMPI',
Esempio n. 2
0
#/data/dgag/projects/GELS/data/RPDR_filelist/filelist_Bib
parser = argparse.ArgumentParser(
    description=
    'A filelist of input / output files, one filename per row no quote marks')
parser.add_argument('-B', required=True, help='full path to the Bib')
parser.add_argument('-C', required=True, help='full path to the Con')
parser.add_argument('-D', required=True, help='full path to the Dem')
parser.add_argument('-O', required=True, help='full path to the output file')

args = parser.parse_args()
fileBib = args.B
fileCon = args.C
fileDem = args.D
fileout = args.O

bib = rp.RPDR_query(name='Bib', filein=fileBib)
con = rp.RPDR_query(name='Con', filein=fileCon)
dem = rp.RPDR_query(name='Dem', filein=fileDem)

bibdf = bib.lst_2_pd()
condf = con.lst_2_pd()
demdf = dem.lst_2_pd()

cols = {
    'Bib': ['Subject_Id', 'EMPI', 'MGH_MRN'],
    'Con': ['EMPI', 'Insurance_1', 'Insurance_2', 'Insurance_3'],
    'Dem': [
        'EMPI', 'Gender', 'Date_of_Birth', 'Language', 'Race',
        'Marital_status', 'Religion', 'Is_a_veteran', 'Vital_status',
        'Date_Of_Death'
    ]
Esempio n. 3
0
# This file could be used to get the test item information out from Phy and Lab, as a potential reference for keyword search
# Dia and Med files could also output Diagnosis information(Non-code) and Medication_Date_Detail
import RPDR_parsing as rp
import argparse

parser = argparse.ArgumentParser(description='A filelist of RPDR filelist, one filename per row no quote marks')
parser.add_argument('-F', required=True, help='full path to the filelist.txt')
parser.add_argument('-O', required=True, help='full path to the output item data file')
parser.add_argument('-N', required=True, help='Name of the RPDR file')
args = parser.parse_args()
filein = args.F
fileout = args.O
name = args.N

query = rp.RPDR_query(name=name, filein=filein)
outdat, header = query.read_data()

item = set(line[5] for line in outdat)

with open(fileout, 'w') as f:
    for i in item:
        f.write(i+'\n')
f.close()

Esempio n. 4
0
                    required=True,
                    help='full path to the IM clinics json file')
parser.add_argument('-O1',
                    required=True,
                    help='full path to the IM EMPI output file')
parser.add_argument('-O2',
                    required=True,
                    help='full path to the IM EMPI output file')

args = parser.parse_args()
fileEnc = args.F
itemlist = args.I
fileout1 = args.O1
fileout2 = args.O2

enc = rp.RPDR_query(name="Enc", filein=fileEnc)
m = rp.matchterm(name="IM_clinics", filein=itemlist)

df = rp.read_matched(rpdrobj=enc, matchtermobj=m)

imdf = df.groupby(df.EMPI).size().reset_index()
imdf = imdf.rename(columns={0: "count"})

im2df = imdf[imdf['count'] > 1]

imdf = imdf[['EMPI']]
im2df = im2df[['EMPI']]

imdf.to_csv(fileout1, index=False, header=False)
im2df.to_csv(fileout2, index=False, header=False)
Esempio n. 5
0
    'A filelist of input / output files, one filename per row no quote marks')
parser.add_argument('-B', required=True, help='full path to the Bib')
parser.add_argument('-C', required=True, help='full path to the Con')
parser.add_argument('-E', required=True, help='full path to the Enc')
parser.add_argument('-L', required=True, help='Name of the clinics list')
parser.add_argument('-O', required=True, help='full path to the output file')

args = parser.parse_args()
fileBib = args.B
fileCon = args.C
fileEnc = args.E
fileout = args.O
itemlist = args.L

bib = bib.RPDR_query_Bib(name='Bib', filein=fileBib)
con = rp.RPDR_query(name='Con', filein=fileCon)
enc = enc.RPDR_query_Enc(name='Enc', filein=fileEnc, itemlist=itemlist)

bibdf = bib.lst_2_pd()
condf = con.lst_2_pd()
encdf = enc.lst_2_pd()

encdf['last_visit_date'] = encdf['Admit_Date'].apply(
    lambda x: pd.to_datetime(x) if (pd.notnull(x)) else x)
#encdf['last_visit_date'] = pd.to_datetime(encdf.Admit_Date)

encdf.sort_values(['EMPI', 'last_visit_date']).drop_duplicates(subset='EMPI',
                                                               keep='last')

cols = {
    'Bib': ['EMPI'],
Esempio n. 6
0
parser.add_argument('-S', required=True, help='full path to the idlist json file')
parser.add_argument('-O1', required=True, help='full path to the BMI output file')
parser.add_argument('-O2', required=True, help='full path to the A1C output file')
parser.add_argument('-O3', required=True, help='full path to the Glucose output file')


args = parser.parse_args()
fileBib = args.B
filePhy = args.P
fileLab = args.L
fileout1 = args.O1
fileout2 = args.O2
fileout3 = args.O3
idlist = args.S

bib = rp.RPDR_query(name='Bib', filein=fileBib)
phy = rp.RPDR_query(name='Phy', filein=filePhy)
lab = rp.RPDR_query(name='Lab', filein=fileLab)

mbib = rp.matchterm(name="IM", filein=idlist)
mbmi_phy = rp.matchterm(name='BMI_Phy', filein=args.PI1)
ma1c_phy = rp.matchterm(name='A1c_Phy', filein=args.PI2)
mglu_phy = rp.matchterm(name='Glu_Phy', filein=args.PI3)
ma1c_lab = rp.matchterm(name='A1c_Lab', filein=args.LI1)
mglu_lab = rp.matchterm(name='Glu_Lab', filein=args.LI2)

start_date = '1/1/2018'
end_date = '12/31/2018'

bibdf = rp.read_matched(rpdrobj=bib, matchtermobj=mbib)
bmi_phy_df = rp.read_matched(rpdrobj=phy, matchtermobj=mbmi_phy, timevar="Date", sdate=start_date, edate=end_date)
Esempio n. 7
0
import RPDR_parsing as rp
import pandas as pd
import timeit

filein = '/Volumes/LaCie/PBB/Phy_filelist'

phy = rp.RPDR_query(name='Phy', filein=filein)
phydat, phyheader = phy.read_data()

item = set(line[5] for line in phydat)

#phydf = pd.DataFrame(phydat, columns=phyheader)
Esempio n. 8
0
cols = {
    'Bib': ['EMPI', 'Subject_Id'],
    'Lab': [
        'EMPI', 'Date', 'Group_Id', 'Result', 'Units', 'Reference_Range',
        'DataSource'
    ],
    'Phy': [
        'EMPI', 'Date', 'Concept_Name', 'Result', 'Units', 'Clinic',
        'Inpatient_Outpatient', 'DataSource'
    ],
    'Med': ['EMPI', 'Medication_Date', 'Medication'],
    'Dia': ['EMPI', 'Date', 'Diagnosis_Name', 'Code_Type', 'Code']
}

bib = rp.RPDR_query(name="Bib", filein=fileBib)
phy = rp.RPDR_query(name="Phy", filein=filePhy)
lab = rp.RPDR_query(name="Lab", filein=fileLab)
med = rp.RPDR_query(name="Med", filein=fileMed)
dia = rp.RPDR_query(name="Dia", filein=fileDia)

mbib = rp.matchterm(name="IM", filein=args.BI)
ma1c_phy = rp.matchterm(name="A1c_Phy", filein=args.PI)
ma1c_lab = rp.matchterm(name="A1c_Lab", filein=args.LI)
mmed1 = rp.matchterm(name="T1DRx", filein=args.MI1)
mmed2 = rp.matchterm(name="T2DRx", filein=args.MI2)
mdia1 = rp.matchterm(name="T1DDx", filein=args.DI1)
mdia2 = rp.matchterm(name="T2DDx", filein=args.DI2)

# subset EMPI for subjects with IM visits based on decision tree
bibdf = rp.read_matched(rpdrobj=bib, matchtermobj=mbib)
Esempio n. 9
0
                    help='full path to the renal failure diagnosis list file')
parser.add_argument(
    '-DC2',
    required=True,
    help='full path to the end stage renal failure diagnosis list file')
parser.add_argument('-O', required=True, help='full path to the output file')

args = parser.parse_args()
fileDem = args.Dem
filePhy = args.P
fileLab = args.L
fileMed = args.M
fileDia = args.D
fileOut = args.O

dem = rp.RPDR_query(name="Dem", filein=fileDem)
phy = rp.RPDR_query(name='Phy', filein=filePhy)
lab = rp.RPDR_query(name='Lab', filein=fileLab)
med = rp.RPDR_query(name='Med', filein=fileMed)
dia = rp.RPDR_query(name='Dia', filein=fileDia)

mid = rp.matchterm(name="IDGRP", filein=args.ID)
mwtphy = rp.matchterm(name="Weight", filein=args.PI1)
mbmiphy = rp.matchterm(name="BMI", filein=args.PI2)
ma1cphy = rp.matchterm(name="A1cPhy", filein=args.PI3)
ma1clab = rp.matchterm(name="A1cLab", filein=args.LI1)
mcrephy = rp.matchterm(name="crePhy", filein=args.PI4)
mcrelab = rp.matchterm(name="creLab", filein=args.LI2)
mgfrphy = rp.matchterm(name="gfrPhy", filein=args.PI5)
mgfrlab = rp.matchterm(name="gfrLab", filein=args.LI3)
malbphy = rp.matchterm(name="ma1bphy", filein=args.PI6)