def __init__(self, database, release_num, release_date): self.database = database self.release_num = release_num self.release_date = release_date self.connector = dbconnect.DatabaseConnect(self.database) self.risk_cat = RiskCategorizer() self.risk_cat.main(self.release_num, self.release_date)
def __init__(self, release_date, release_num, db): self.start_date = '2013-01-01' self.db = db self.connector = dbconnect.DatabaseConnect(self.db) self.release_date = release_date self.release_num = release_num
def __init__(self, database, release_num, source): print('Loading data from files...') file_paths = os.getcwd() + '/output_data' file_paths = file_paths.replace('\\', '/') self.info_dict = { 'path': file_paths, 'adjustment': 'adjustedclaimextractuick1.out', 'main_claims': 'claim_finaluick1.out', 'nips': 'servicenips_finaluick1.out', 'pharmacy': 'pharmacy_finaluick1.out', 'procedure': 'serviceproc_finaluick1.out', 'recipient_flags': 'recipientflags_final_uick1.out', 'revenue': 'servicerev_finaluick1.out', 'compound_drug': 'servicepharmndc_finaluick1.out', 'immunization': 'cornerstone_finaluick1.out', 'diagnosis': 'servicediag_finaluick1.out', 'institutional': 'serviceinst_finaluick1.out', 'lead': 'lead_finaluick1.out', 'ending': '\n\n' } self.info_dict['load_date'] = '{:%Y-%m-%d}'.format(datetime.today()) self.info_dict['DataSource'] = source self.info_dict['db'] = database self.info_dict['ReleaseNum'] = str(release_num).replace('\n', '').strip() self.info_dict['Cumulative_ReleaseNum'] = str(release_num).replace( '\n', '').strip()[2:] self.load_inline_dict = {} if 'sql_scripts' not in os.listdir(): os.mkdir('sql_scripts') self.connection = dbconnect.DatabaseConnect(self.info_dict['db'])
def __init__(self, database, release_num, file_path): self.raw_file_path = os.path.join(file_path, 'Raw_Data') self.sql_file_path = os.path.join(file_path, 'SQL_Scripts') self.info_dict = { 'path': self.raw_file_path, 'adjustment': 'Adjustments', 'main_claims': 'Main_Claims', 'nips': 'NIPS', 'pharmacy': 'Pharmacy', 'procedure': 'Procedure', 'recipient_flags': 'Recipient_Flags', 'revenue': 'Revenue_Codes', 'compound_drug': 'Pharmacy_Prior_Authorization', 'immunization': 'Cornerstone_Immunization', 'diagnosis': 'Diagnosis', 'institutional': 'Institutional', 'lead': 'Lead', 'ending': ';\n\n' } self.info_dict['load_date'] = '{:%Y-%m-%d}'.format(datetime.today()) self.info_dict['db'] = database self.info_dict['ReleaseNum'] = str(int(release_num)) self.load_inline_dict = {} self.connection = dbconnect.DatabaseConnect(self.info_dict['db'])
def __init__(self, database, release_num): self.update_tables = [ 'nips', 'diagnosis', 'institutional', 'procedure', 'revenue_codes', 'main_claims' ] self.connection = dbconnect.DatabaseConnect(database) self.release_num = release_num
def __init__(self, db): self.connection = dbconnect.DatabaseConnect(db) cat_path = pkg_resources.resource_filename(__name__, 'categorization_files/') self.nips_cat_df = pd.read_csv(os.path.join(cat_path, 'nips_cat.csv')) self.op_hcpcs_df = pd.read_csv(os.path.join(cat_path, 'op_hcpcs_codes_cat.csv')) self.op_rev_df = pd.read_csv(os.path.join(cat_path,'op_rev_codes_cat.csv'), dtype={'RevenueCd':'str'}) self.check_cat_df = pd.read_csv(os.path.join(cat_path,'check_category.csv'))
def __init__(self, release_num, release_date, database='CHECK_CPAR2', data_source='HFS'): self.release_date = release_date self.database = database self.data_source = data_source self.conn = dbconnect.DatabaseConnect(self.database) self.release_num = release_num
def __init__(self, database, release_date, release_num): #qualifying ratio i,e, 3 inclusion codes for every 1 exclusion code to be diagnosed self.dx_ratio = {'SCD': .75} self.connection = dbconnect.DatabaseConnect(database) self.release_num = release_num self.diagnosis_tables = [ 'pat_info_dx_mental_health', 'pat_info_dx_pregnancy', 'pat_info_dx_primary' ]
def __init__(self,pp_n_months,release_num,db_name): '''pp_n_months: (int) Number of months to select pre and post the patients program date release_num: (int) Release to select from rid_pre_post_pat_windows Currently only works for test CHECK_Categories''' self.query = conconnect.ConsensusConnect() self.pp_n_months = pp_n_months self.release_num = release_num self.db_name = db_name self.connection = dbconnect.DatabaseConnect(self.db_name) self.cost_columns = ['Inpatient_Pre','Inpatient_Post','Outpatient_Pre', 'Outpatient_Post','ED_Pre','ED_Post','Other_Pre','Other_Post', 'Pharmacy_Pre','Pharmacy_Post','Total_Pre','Total_Post']
def __init__(self, database, release_date, release_num): self.release_date = release_date self.release_num = release_num self.connection = dbconnect.DatabaseConnect(database) self.risk_date_columns = { 'Release_Date': 'Current_Risk', 'Engagement_Date': 'Engagement_Risk', 'Enrollment_Date': 'Enrollment_Risk', 'Randomization_Date': 'Randomization_Risk', 'Program_Date': 'Program_Risk' }
def __init__(self, release_num, db_name): self.release_num = release_num self.db_name = db_name self.connector = dbconnect.DatabaseConnect(self.db_name) self.primary_keys = [ 'DCN', 'ServiceLineNbr', 'RejectionStatusCd', 'RecipientID', 'AdjudicatedDt' ] self.output_cols = [ 'RecipientID', 'RejectionStatusCd', 'ServiceLineNbr', 'AdjudicatedDt', 'DCN', 'Category1', 'Category2', 'Category3', 'Category2Rank', 'Category3Rank', 'NetLiabilityAmt', 'EncounterPriceAmt', 'Visit', 'Service_Count', 'Procedure_Count', 'Encounter', 'Visit_Inpatient_Days' ]
def connect(self, sql_str, db_name, df_flag=True, parse_dates=None): '''sql_str: query text to be sent to db db_name: str of the database query is sent to df_flag: Boolean to return an pandas dataframe or not''' connector = dbconnect.DatabaseConnect(db_name) try: if df_flag == False: connector.query(sql_str, df_flag=False) alliDF = "'{}' successfully ran".format(sql_str) elif df_flag == True: alliDF = connector.query(sql_str, df_flag=True, parse_dates=parse_dates) finally: f = 'completed' return alliDF
def window_load(self): connector = dbconnect.DatabaseConnect(self.database) # Date when first bills came in from HFS check_start_date = '2014-05-01' # Pulls patients in most recent ReleaseNum pt_df = connector.query("""SELECT RecipientID,Program_Date, pic.ReleaseNum, '{}' as CHECK_Start_Date, '{}' as HFS_Release_Date from pat_info_complete pic where pic.ReleaseNum = {} """.format(check_start_date, self.release_date, self.release_num), parse_dates=[ 'Program_Date', 'HFS_Release_Date', 'CHECK_Start_Date' ]) print('pt_df ', pt_df) # goes back 6 months in time to be more bills were paid pt_df['CHECK_End_Date'] = pt_df['HFS_Release_Date'].apply( lambda x: pd.Timestamp(x) - pd.DateOffset(months=6)) pt_df['Positive_Duration'] = ( (pt_df['CHECK_End_Date'] - pt_df['Program_Date'])) / np.timedelta64(1, 'M') pt_df['Negative_Duration'] = ( (pt_df['Program_Date'] - pt_df['CHECK_Start_Date'])) / np.timedelta64(1, 'M') pt_df['Positive_Duration'] = pt_df['Positive_Duration'].apply( math.floor) pt_df['Negative_Duration'] = pt_df['Negative_Duration'].apply( math.floor) cols = [ 'RecipientID', 'Program_Date', 'Negative_Duration', 'Positive_Duration', 'ReleaseNum' ] connector.insert(pt_df[cols], 'rid_pre_post_pat_windows') return 'Window load complete'
def __init__(self, database='CHECK_CPAR2'): #qualifying ratio i,e, 3 inclusion codes for every 1 exclusion code to be diagnosed self.dx_ratio = {'SCD': .75} self.connector = dbconnect.DatabaseConnect(database) self.diagnosis_tables = [ 'pat_info_dx_mental_health', 'pat_info_dx_pregnancy', 'pat_info_dx_primary' ] self.pat_info_query = """SELECT p.RecipientID, p.Enrollment_Age, p.Gender, if(d.RecipientID is null,'0', GROUP_CONCAT(Distinct DiagCd separator ',')) ICD_List FROM pat_info_demo p left join tsc_hfs_diagnosis d on p.RecipientID = d.RecipientID group by RecipientID""" self.dx_code_query = '''SELECT RecipientID, DiagCd, count(*) ICD_Count
def main(self, release_num, release_date): self.connector = dbconnect.DatabaseConnect('CHECK_CPAR2') self.max_date = pd.Timestamp(release_date) self.ip_ed_df = self.ip_ed_query() self.ip_ed_df = pd.pivot_table(self.ip_ed_df, index=['RecipientID', 'ServiceFromDt'], columns='Category', values='encounters', aggfunc='first', fill_value=0) self.ip_ed_df.reset_index(inplace=True) self.enroll_df = self.connector.query('''select RecipientID, if(Initial_Enrollment_Date is null, Program_date, Initial_Enrollment_Date) as Initial_Enrollment_Date from pat_info_demo;''') self.engage_df = self.connector.query('''select RecipientID, Engagement_Date from pat_info_demo WHERE Engagement_Date is not null;''')
def full_run(self, recipient_list=None, to_sql=False): demo_df = self.query.cpar_patient_info() pat_program_dates = demo_df[['RecipientID', 'Program_Date']].copy() demo_df = demo_df[[ 'RecipientID', 'Population_Type', 'Diagnosis_Category', 'Program_Risk', 'Program_Age_Category', 'Gender' ]] demo_df = demo_df.set_index('RecipientID') demo_df.rename(columns={ 'Program_Risk': 'Risk', 'Program_Age_Category': 'Age_Category' }, inplace=True) connector = dbconnect.DatabaseConnect('CHECK_CPAR2') if recipient_list is None: recipient_list = demo_df.index.unique() jumper = 500 cat_col_list = [ 'CHECK_Category', 'Category1', 'Category2', 'Category3' ] output_path = "rolling_window_output/" for x in range(0, len(recipient_list), jumper): file_name = "{}pt_level_{}.csv".format(output_path, x) rolling_df = [] temp_unique_rins = recipient_list[x:x + jumper] pt_costs_df = self.claims_query(pat_program_dates, temp_unique_rins) for cat_col in cat_col_list: rolling_win = self.to_rolling_pivot(pt_costs_df, cat_col) rolling_win = rolling_win.set_index('RecipientID') rolling_win = pd.merge(rolling_win, demo_df, left_index=True, right_index=True) rolling_df.append(rolling_win) rolling_df = pd.concat(rolling_df) rolling_df = rolling_df[[ 'Population_Type', 'Diagnosis_Category', 'Risk', 'Age_Category', 'Gender', 'Category_Type', 'Category', 'Window', 'AdjustedPriceAmt', 'Encounter', 'ServiceCount', 'VisitInpatientDays' ]] rolling_df.to_csv(file_name, chunksize=100000) print('Completed rolling window calculation') grouping_list = [ [], ['Diagnosis_Category'], ['Risk'], ['Age_Category'], ['Diagnosis_Category', 'Risk'], ['Diagnosis_Category', 'Age_Category'], ['Population_Type', 'Diagnosis_Category'], ['Population_Type', 'Diagnosis_Category', 'Age_Category'], ['Population_Type', 'Gender'], ['Diagnosis_Category', 'Risk', 'Age_Category'], ['Population_Type', 'Risk'] ] if 'agg_output' not in os.listdir(): os.mkdir('agg_output') df = self.aggregation_df() for group in grouping_list: print(group) group_output = self.window_aggregation(df, group) group_output.to_csv('agg_output/Aggregation_' + "_".join(group) + '.csv') if to_sql == True: load_files(output_path) return 'completed'
import pandas as pd import numpy as np from CHECK.dbconnect import dbconnect def mc_pivot(mc_df, columns, values, margins_name): '''pivots data to get sums of column values and renames heirrchial columns''' pivot_mc_df = pd.pivot_table(mc_df,index=['RecipientID'],columns=columns, values=values, fill_value=0,aggfunc=np.sum, margins=True, margins_name=margins_name) pivot_mc_df.columns = pivot_mc_df.columns = [col[1]+"_"+col[0] for col in pivot_mc_df.columns.values] pivot_mc_df.reset_index(inplace=True) pivot_mc_df = pivot_mc_df[:-1] return pivot_mc_df connector = dbconnect.DatabaseConnect('CHECK_CPAR2') release_info = connector.query("""Select ReleaseNum, HFS_Release_Date FROM hfs_release_info WHERE ReleaseNum = (SELECT MAX(ReleaseNum) from hfs_release_info)""") #subtracts 1 to get bills since last release relnum = release_info['ReleaseNum'][0] - 1 reldate = release_info['HFS_Release_Date'][0].strftime('%Y-%m-%d') min_window_dt = connector.query('''Select max(servicefromdt) as min_ser_dt from tsc_hfs_main_claims where ReleaseNum = {};'''.format(relnum)) min_window_dt = min_window_dt['min_ser_dt'][0].strftime('%Y-%m-%d') mc_df = connector.query('''Select RecipientID, CHECK_Category, sum(visit) as Visit, count(*) as Encounters, min(ServiceFromDt) as 'Min_ServiceFromDt', Max(ServiceFromDt) as 'Max_ServiceFromDt', sum(AdjustedPriceAmt) as AdjustedPriceAmt from tsc_hfs_main_claims_new where ServiceFromDt > '{}' and ServiceFromDt <= '{}' group by RecipientID, CHECK_Category;'''.format(min_win_dt,reldate) ,parse_dates = ['Max_ServiceFromDt','Min_ServiceFromDt']) high_util_df = mc_pivot(mc_df,'CHECK_Category',['Visit','Encounters','AdjustedPriceAmt'],'Release')
def calculate_ucsd_risk(self): self.connector = dbconnect.DatabaseConnect('CHECK_CPAR2') dir = "/home/data_upload/.ipython/CHECK/cpar" file1 = open(dir + "/pat_info/ucsd_files/cdpsfmt1.child.txt", "r") self.d1 = {} for line in file1: x1 = line.split("=") key = x1[0].strip("'\r") value = x1[1].strip(';\n\r') self.d1[key] = value.strip("'") fd = open(dir + "/pat_info/ucsd_files/codes.txt", "r") self.codeSet = {} i = 0 for line in fd: self.codeSet[i] = line.split() i += 1 file3 = open(dir + "/pat_info/ucsd_files/cdps_dadc.txt", "r") d4 = {} for line in file3: x1 = line.split("=") key = x1[0].strip("'\r") value = x1[1].strip(';\n\r') d4[key] = value.strip() self.masterD = {} sql2 = """SELECT REPLACE(FILE,',',"'='") UCSD_RISK FROM (SELECT CONCAT (hfsr.RecipientID,"='",hfsr.ICD_list,"'") FILE FROM CHECK_CPAR2.pat_info_dx_primary hfsr) tbl1""" self.execute_cursor(sql2) rGen = self.report_generator() info = next(rGen) risk_raw_df = pd.DataFrame(columns=['RecipientID', 'Risk']) i = 0 while info: # print(info) lineToPrint = info[0][:] total = 0.0 for item in info[1]: total = total + float(d4[item]) lineToPrint += "\t\t" + str(total) RIN = lineToPrint[0:9] # print(RIN, total) risk_raw_df.loc[i, 'RecipientID'] = RIN risk_raw_df.loc[i, 'Risk'] = total i = i + 1 try: info = next(rGen) except StopIteration: break demo_df = self.connector.query('''select RecipientID, Gender, Age from pat_info_demo;''') risk_raw_df = pd.merge(risk_raw_df, demo_df, on='RecipientID', how='left') risk_raw_df.loc[(risk_raw_df['Age'] <= 1), 'RiskScore'] = risk_raw_df['Risk'] + 0.398 + 0.226 risk_raw_df.loc[((risk_raw_df['Age'] > 1) & (risk_raw_df['Age'] < 5)), 'RiskScore'] = risk_raw_df['Risk'] - 0.068 + 0.226 risk_raw_df.loc[((risk_raw_df['Age'] >= 5) & (risk_raw_df['Age'] < 15) & (risk_raw_df['Gender'] == 'Male')), 'RiskScore'] = risk_raw_df['Risk'] - 0.06 + 0.226 risk_raw_df.loc[((risk_raw_df['Age'] >= 5) & (risk_raw_df['Age'] < 15) & (risk_raw_df['Gender'] == 'Female')), 'RiskScore'] = risk_raw_df['Risk'] - 0.105 + 0.226 risk_raw_df.loc[((risk_raw_df['Age'] >= 15) & (risk_raw_df['Age'] < 25) & (risk_raw_df['Gender'] == 'Male')), 'RiskScore'] = risk_raw_df['Risk'] - 0.026 + 0.226 risk_raw_df.loc[((risk_raw_df['Age'] >= 15) & (risk_raw_df['Age'] < 25) & (risk_raw_df['Gender'] == 'Female')), 'RiskScore'] = risk_raw_df['Risk'] + 0.051 + 0.226 risk_raw_df.loc[((risk_raw_df['Age'] >= 25) & (risk_raw_df['Gender'] == 'Male')), 'RiskScore'] = risk_raw_df['Risk'] - 0.068 + 0.226 risk_raw_df.loc[((risk_raw_df['Age'] >= 25) & (risk_raw_df['Gender'] == 'Female')), 'RiskScore'] = risk_raw_df['Risk'] + 0.041 + 0.226 risk_raw_df = risk_raw_df.drop(['Gender', 'Age'], axis=1) risk_raw_df = risk_raw_df.rename(columns={ 'Risk': 'UCSD_Risk_Raw', 'RiskScore': 'UCSD_Risk' }) return risk_raw_df
def __init__(self, database, release_num): self.connection = dbconnect.DatabaseConnect(database) self.release_num = release_num