def delAllFalseSUDusers(logger): makeNewQry = ''' CREATE TABLE tejas.sud_users_new ( siteid text, backgroundid text, alc bool, cannabis bool, amphe bool, halluc bool, nicotin bool, cocaine bool, opioids bool, sedate bool, others bool, polysub bool, inhalant bool, morethan2sud bool ) ''' value = pgIO.commitData(makeNewQry) if value == True: print("tejas.sud_users_new table has been created") deleteDupliQuery = ''' INSERT INTO tejas.sud_users_new (siteid, backgroundid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant) SELECT siteid, backgroundid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant FROM tejas.sud_users GROUP BY siteid, backgroundid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant ''' value = pgIO.commitData(deleteDupliQuery) if value == True: print("Duplicate values succesfully deleted") return
def addmorethan2sudcolumn(logger): '''Populates the 'morethan2sud' column in tejas.sud_race_age This function counts the number of 'True' for each mental disorder for each user in tejas.sud_race_age. If they have more than 1 'True' value, their 'morethan2sud' column will be set to 'True'. Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: query = ''' SELECT siteid, backgroundid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant FROM tejas.sud_race_age ''' data = pgIO.getAllData(query) csvfile = '../data/raw_data/atleast2suduser_keys.csv' count = 0 output = open(csvfile, 'w+') csv_output = csv.writer(output) for row in data: if sum(list(row[2:])) >= 2: csv_output.writerow(row) readCSV = csv.reader(open(csvfile), delimiter=",") for user in tqdm(readCSV): updateQuery = SQL(''' UPDATE tejas.sud_race_age SET morethan2sud = true WHERE siteid = {} AND backgroundid = {} ''').format(Literal(user[0]), Literal(str(user[1]))) value = pgIO.commitData(updateQuery) # print(type(user[0])) #Update column's null values to false updateQuery2 = ''' UPDATE tejas.sud_race_age SET morethan2sud = false WHERE morethan2sud is null ''' print(pgIO.commitData(updateQuery2)) except Exception as e: logger.error('adding morethan2sud column to the database failed because of {}'.format(e)) return
def relabelVar(logger): '''Relabels column values This function relabels Race and Settings values to standardised values. Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: # relabel_sex_success = [] # for sex in table1_config["inputs"]["sexes"]: # sex_query = SQL(''' # UPDATE sarah.test2 # SET sex = {} # WHERE sex in {} # ''').format( # Literal(sex), # Literal(tuple(table1_config["params"]["sexes"][sex])) # ) # relabel_sex_success.append(pgIO.commitData(sex_query)) # if False in relabel_sex_success: # print("Relabelling sex not successful!") relabel_race_success = [] for race in table1_config["inputs"]["races"]: race_query = SQL(''' UPDATE sarah.test2 SET race = {} WHERE race in {} ''').format(Literal(race), Literal(tuple(table1_config["params"]["races"][race]))) relabel_race_success.append(pgIO.commitData(race_query)) if False in relabel_race_success: print("Relabelling race not successful!") relabel_setting_success = [] for setting in table1_config["inputs"]["settings"]: setting_query = SQL(''' UPDATE sarah.test2 SET visit_type = {} WHERE visit_type in {} ''').format( Literal(setting), Literal(tuple(table1_config["params"]["settings"][setting]))) relabel_setting_success.append(pgIO.commitData(setting_query)) if False in relabel_setting_success: print("Relabelling setting not successful!") except Exception as e: logger.error('Failed to update table test2 because {}'.format(e))
def createTable(logger, schemaName, tableName, createTableQueryString, existsTableQueryString=''): if checkTableExistence(schemaName, tableName): if existsTableQueryString != '': pgIO.commitData(existsTableQueryString, dbName=dbName) return False else: pgIO.commitData(createTableQueryString, dbName=dbName) return True
def createJoined(logger): makeNew = ''' CREATE TABLE tejas.sud_race_age ( siteid text, backgroundid text, age text, sex text, visit_type text, race text, alc bool, cannabis bool, amphe bool, halluc bool, nicotin bool, cocaine bool, opioids bool, sedate bool, others bool, polysub bool, inhalant bool, morethan2sud bool ) ''' value = pgIO.commitData(makeNew) if value == True: print("tejas.sud_race_age table has been created") return
def popJoined(logger): popQry = ''' INSERT INTO tejas.sud_race_age ( siteid, backgroundid, age, sex, visit_type, race, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant, morethan2sud) SELECT t1.siteid as siteid, t1.backgroundid as backgroundid, age, sex, visit_type, race, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant, morethan2sud FROM tejas.race_age_t1new t1 INNER JOIN tejas.sud_users_new t2 ON t1.siteid = t2.siteid AND t1.backgroundid = t2.backgroundid ''' value = pgIO.commitData(popQry) if value == True: print("tejas.sud_race_age joined table has been populated") return
def createTest4Table(logger): '''Creates test4 This function creates the table sarah.test4, which contains boolean columns for each mental disorder. Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: create_query = ''' CREATE TABLE sarah.test4( patientid integer, alc bool, cannabis bool, amphe bool, halluc bool, nicotin bool, cocaine bool, opioids bool, sedate bool, others bool, polysub bool, inhalant bool, morethan2sud bool ) ''' print(pgIO.commitData(create_query)) except Exception as e: logger. error('Failed to create test4 table because of {}'.format(e)) return
def createTest3(logger): '''Creates sarah.test3 table Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: query = ''' CREATE TABLE sarah.test3( patientid text, mood bool, anxiety bool, adjustment bool, adhd bool, sud bool, psyc bool, pers bool, childhood bool, impulse bool, cognitive bool, eating bool, smtf bool, disso bool, sleep bool, fd bool )''' value = pgIO.commitData(query) if value == True: print("sarah.test3 table has been successfully created") except Exception as e: logger.error('Unable to create table test3 because {}'.format(e)) return
def cleanUp(logger): dropQuery = SQL(''' DROP TABLE IF EXISTS tejas.restofusers; ''') value = pgIO.commitData(dropQuery) if value == True: print("All tables have been successfully deleted") return
def recursiveQuery(totalRows, recursionChunkSize=1000, scalingFactor=0.1, ttl=5): getFilterString('race', jsonConfig["inputs"]["raceFilterPath"]) getFilterString('sex', jsonConfig["inputs"]["sexFilterPath"]) getFilterString('sex', jsonConfig["inputs"]["sexFilterPath"]) for idx in range(0, totalRows, recursionChunkSize): lowerBound = idx upperBound = idx + recursionChunkSize queryString = ''' INSERT into jingwen.temp2 with cte as ( select *, ROW_NUMBER() OVER (PARTITION BY id, siteid ORDER BY age asc) AS rn from ( select background.id, background.siteid, background.race, background.sex, typepatient.age, typepatient.visit_type from ( select id, siteid, race, sex from raw_data.background where CAST (id as INTEGER) >= {} and CAST (id as INTEGER) < {} and '''.format( lowerBound, upperBound) + getFilterString( 'race', jsonConfig["inputs"]["raceFilterPath"]) + ''' and ''' + getFilterString( 'sex', jsonConfig["inputs"]["sexFilterPath"]) + ''' ) as background inner join ( select backgroundid, siteid, age, visit_type, created from raw_data.typepatient where ''' + getFilterString( 'visit_type', jsonConfig["inputs"]["settingFilterPath"]) + ''' and (age IS NOT NULL ) ) as typepatient on typepatient.backgroundid = background.id and typepatient.siteid = background.siteid )as x ) select id, siteid, race, sex, age, visit_type from cte where rn = 1 '''.format(lowerBound, upperBound) isSuccesfulFlag = pgIO.commitData(queryString, dbName=dbName) print("ID {} to {}: {}".format(lowerBound, upperBound, isSuccesfulFlag)) return True
def alterTable(logger): alter = ''' ALTER TABLE tejas.race_age_t1new ALTER COLUMN age TYPE INTEGER USING (age::integer) ''' value = pgIO.commitData(alter) if value == True: print("successfully altered the age column") return
def subroutineRelabelComorbid(logger): fullTableName = jsonConfig["inputs"]["schemaName"] + "." + jsonConfig[ "inputs"]["tableName"] queryStringList = [] queryStringList += (getRelabelString( 'race', jsonConfig["inputs"]["raceFilterPath"])) queryStringList += (getRelabelString( 'visit_type', jsonConfig["inputs"]["settingFilterPath"])) queryStringList += (getRelabelString( 'sex', jsonConfig["inputs"]["sexFilterPath"])) queryStringList.append(''' ALTER TABLE {} ADD age_categorical text NULL; '''.format(fullTableName)) for item in [[1, 11], [12, 17], [18, 34], [35, 49]]: queryStringList.append(''' UPDATE {} SET age_categorical='{}-{}' WHERE CAST ({}.age AS INTEGER) <= {} and CAST ({}.age AS INTEGER) >= {} '''.format(fullTableName, item[0], item[1], fullTableName, item[1], fullTableName, item[0])) queryStringList.append(''' UPDATE {} SET age_categorical='50+' WHERE CAST ({}.age AS INTEGER) >= 50 '''.format(fullTableName, fullTableName)) queryStringList.append(''' UPDATE {} SET age_categorical='0' WHERE CAST ({}.age AS INTEGER) = 0 '''.format(fullTableName, fullTableName)) for relabelQuery in queryStringList: pgIO.commitData(relabelQuery, dbName=dbName) return
def createrestofusersT3part1(logger): try: query = ''' CREATE TABLE tejas.restofusers_t3_p1( sud bool, race text, age text, sex text, visit_type text )''' value = pgIO.commitData(query) if value == True: print("tejas.restofusers_t3 table has been successfully created") except Exception as e: logger.error('Unable to create table restofusers because {}'.format(e)) return
def poprestofusersT3part1(logger): getQuery = ''' INSERT INTO tejas.restofusers_t3_p1(sud, race, age, sex, visit_type) SELECT t2.sud, t1.race, t1.age, t1.sex, t1.visit_type FROM tejas.race_age_t1new t1 INNER JOIN tejas.restofusers t2 ON t1.siteid = t2.siteid AND t1.backgroundid = t2.backgroundid WHERE (cast (age as int) >= 12) AND (cast (age as int) <= 100) GROUP BY (t1.siteid, t1.backgroundid, t2.sud, t1.race, t1.age, t1.sex, t1.visit_type) ''' value = pgIO.commitData(getQuery) if value == True: print("successfully populated tejas.restofusers_t3") return
def recursiveQuery(totalRows, recursionChunkSize=1000, scalingFactor=0.1, ttl=5, offset=0): for idx in range(offset, offset + totalRows, recursionChunkSize): lowerBound = idx upperBound = idx + recursionChunkSize queryString = ''' INSERT into {0}.temp3 SELECT temp2.*, y.dsmno from {0}.temp2 as temp2 inner join ( select patientid, dsmno from ( select temp2.patientid, {3}.pdiagnose.dsmno from ( select patientid from {0}.temp2 where CAST (patientid as INTEGER) >= {1} and CAST (patientid as INTEGER) < {2} ) as temp2 inner join {3}.pdiagnose on CAST({3}.pdiagnose.patientid as TEXT) = CAST(temp2.patientid as TEXT) ) as x group by patientid, dsmno ) as y on CAST(y.patientid as TEXT) = CAST(temp2.patientid as TEXT) '''.format(schemaName, lowerBound, upperBound, rawSchemaName) isSuccesfulFlag = pgIO.commitData(queryString, dbName=dbName) print("ID {} to {}: {}".format(lowerBound, upperBound, isSuccesfulFlag)) if not isSuccesfulFlag: if ttl > 0 and recursionChunkSize * scalingFactor >= 1: recursiveQuery(upperBound, recursionChunkSize = round(recursionChunkSize*scalingFactor), \ scalingFactor = scalingFactor, \ ttl = ttl-1, \ offset = lowerBound ) return
def delAllFalserestofusers(logger): '''Second filter of users from tejas.restofusers Deletes users who have no target mental disorder diagnoses. Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: query = ''' DELETE FROM tejas.restofusers WHERE mood = false and anxiety = false and adjustment = false and adhd = false and sud = false and psyc = false and pers = false and childhood = false and impulse = false and cognitive = false and eating = false and smtf = false and disso = false and sleep = false and fd = false''' value = pgIO.commitData(query) if value == True: print( "Users with no diagnosis in tejas.restofusers table has been successfully deleted" ) except Exception as e: logger.error( 'Unable to delete from table restofusers because {}'.format(e)) return
def createsud_usersTable(logger): '''Creates sud_users This function creates the table tejas.sud_users, which contains boolean columns for each mental disorder. Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: create_query = ''' CREATE TABLE tejas.sud_users( siteid text, backgroundid text, alc bool, cannabis bool, amphe bool, halluc bool, nicotin bool, cocaine bool, opioids bool, sedate bool, others bool, polysub bool, inhalant bool, morethan2sud bool ) ''' value = pgIO.commitData(create_query) if value == True: print("tejas.sud_users table has been created") except Exception as e: logger.error( 'Failed to create sud_users table because of {}'.format(e)) return
def recursiveQuery(totalRows, recursionChunkSize=1000, scalingFactor=0.1, ttl=5): for idx in range(0, totalRows, recursionChunkSize): lowerBound = idx upperBound = idx + recursionChunkSize queryString = ''' INSERT into jingwen.temp3 SELECT temp2.*, y.dsmno from jingwen.temp2 as temp2 inner join ( select id, dsmno, siteid from ( select temp2.id, temp2.siteid, raw_data.pdiagnose.dsmno from ( select id, siteid from jingwen.temp2 where CAST (id as INTEGER) >= {} and CAST (id as INTEGER) < {} ) as temp2 inner join raw_data.pdiagnose on raw_data.pdiagnose.backgroundid = temp2.id and raw_data.pdiagnose.siteid = temp2.siteid ) as x group by id, dsmno, siteid ) as y on y.id = temp2.id and y.siteid = temp2.siteid '''.format(lowerBound, upperBound) isSuccesfulFlag = pgIO.commitData(queryString, dbName=dbName) print("ID {} to {}: {}".format(lowerBound, upperBound, isSuccesfulFlag)) return True
def recursiveQuery(totalRows, recursionChunkSize=1000, scalingFactor=0.1, ttl=5, offset=0): raceFilter = getFilterString('race', jsonConfig["inputs"]["raceFilterPath"]) sexFilter = getFilterString('sex', jsonConfig["inputs"]["sexFilterPath"], typeCategory='TEXT') settingFilter = getFilterString( 'visit_type', jsonConfig["inputs"]["settingFilterPath"]) for idx in range(offset, offset + totalRows, recursionChunkSize): lowerBound = idx upperBound = idx + recursionChunkSize queryString = ''' INSERT into {0}.temp2 with cte as ( select *, ROW_NUMBER() OVER (PARTITION BY patientid ORDER BY age asc) AS rn from ( select background.patientid, background.race, background.sex, typepatient.age, typepatient.visit_type from ( select patientid, race, sex from {3}.background where CAST (patientid as INTEGER) >= {1} and CAST (patientid as INTEGER) < {2} and race is not null and '''.format( schemaName, lowerBound, upperBound, rawSchemaName) + raceFilter + ''' and ''' + sexFilter + ''' ) as background inner join ( select patientid, age, visit_type from {0}.typepatient where '''.format( rawSchemaName) + settingFilter + ''' and (age IS NOT NULL ) ) as typepatient on typepatient.patientid = background.patientid )as x ) select patientid, race, sex, age, visit_type from cte where rn = 1 ''' isSuccesfulFlag = pgIO.commitData(queryString, dbName=dbName) print("ID {} to {}: {}".format(lowerBound, upperBound, isSuccesfulFlag)) if not isSuccesfulFlag: if ttl > 0 and recursionChunkSize * scalingFactor >= 1: recursiveQuery(upperBound, recursionChunkSize = round(recursionChunkSize*scalingFactor), \ scalingFactor = scalingFactor, \ ttl = ttl-1, \ offset = lowerBound ) return
def addmorethan2sudcolumn(logger): '''Populates the 'morethan2sud' column in sarah.test4 This function counts the number of 'True' for each mental disorder for each user in sarah.test4. If they have more than 1 'True' value, their 'morethan2sud' column will be set to 'True'. Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: query = ''' SELECT t1.patientid, t2.alc, t2.cannabis, t2.amphe, t2.halluc, t2.nicotin, t2.cocaine, t2.opioids, t2.sedate, t2.others, t2.polysub, t2.inhalant FROM sarah.test2 t1 INNER JOIN sarah.test4 t2 ON t1.patientid = t2.patientid ''' data = pgIO.getAllData(query) csvfile = '../data/raw_data/morethan2suduser_keys.csv' with open(csvfile, 'w+') as output: csv_output = csv.writer(output) for row in data: if sum(list(row[1:12])) >= 2: csv_output.writerow(row) output.close() with open(csvfile) as f: readCSV = csv.reader(f, delimiter=",") for user in tqdm(readCSV): updateQuery = ''' UPDATE sarah.test4 SET morethan2sud = True WHERE patientid = {} '''.format(user[0]) print(pgIO.commitData(updateQuery)) # print(type(user[0])) #Update column's null values to false updateQuery2 = ''' UPDATE sarah.test4 SET morethan2sud = False WHERE morethan2sud is null ''' print(pgIO.commitData(updateQuery2)) except Exception as e: logger.error( 'adding morethan2sud column to the databse failed because of {}'. format(e)) return
def popsud_users(logger): '''Populates sud_users This function populates the table tejas.sud_users, which contains boolean columns for each mental disorder. If a user's row has True for that column, it means that he/she has that disorder, and vice versa. Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: with open(all_userkeys) as f: readCSV = csv.reader(f, delimiter=",") for user in tqdm(readCSV): # print(user) popTableQuery = SQL(''' INSERT INTO tejas.sud_users(siteid, backgroundid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant) SELECT siteid, backgroundid, array_agg(distinct cast(dsmno as text)) && array[{}] as alc, array_agg(distinct cast(dsmno as text)) && array[{}] as cannabis, array_agg(distinct cast(dsmno as text)) && array[{}] as amphe, array_agg(distinct cast(dsmno as text)) && array[{}] as halluc, array_agg(distinct cast(dsmno as text)) && array[{}] as nicotin, array_agg(distinct cast(dsmno as text)) && array[{}] as cocaine, array_agg(distinct cast(dsmno as text)) && array[{}] as opioids, array_agg(distinct cast(dsmno as text)) && array[{}] as sedate, array_agg(distinct cast(dsmno as text)) && array[{}] as others, array_agg(distinct cast(dsmno as text)) && array[{}] as polysub, array_agg(distinct cast(dsmno as text)) && array[{}] as inhalant FROM raw_data.pdiagnose WHERE siteid = {} AND backgroundid = {} GROUP BY siteid, backgroundid ''').format( Literal(table2_config["params"]["sudcats"]["alc"]), Literal(table2_config["params"]["sudcats"]["cannabis"]), Literal(table2_config["params"]["sudcats"]["amphe"]), Literal(table2_config["params"]["sudcats"]["halluc"]), Literal(table2_config["params"]["sudcats"]["nicotin"]), Literal(table2_config["params"]["sudcats"]["cocaine"]), Literal(table2_config["params"]["sudcats"]["opioids"]), Literal(table2_config["params"]["sudcats"]["sedate"]), Literal(table2_config["params"]["sudcats"]["others"]), Literal(table2_config["params"]["sudcats"]["polysub"]), Literal(table2_config["params"]["sudcats"]["inhalant"]), Literal(user[0]), Literal(user[1])) value = pgIO.commitData(popTableQuery) except Exception as e: logger.error( 'Failed to populate sud_users table because of {}'.format(e)) return
def main(logger, resultsDict): raceList = pd.read_csv( jsonConfig["inputs"]["raceFilterPath"])['category'].unique() SUDList = pd.read_csv(jsonConfig["inputs"]["dsmSUDPath"]).columns.tolist() diagnosesList = pd.read_csv( jsonConfig["inputs"]["dsmDiagnosesPath"]).columns.tolist() rawSUDList = SUDList rawDiagnosesList = diagnosesList SUDList = [headerParse(item) for item in SUDList] diagnosesList = [headerParse(item) for item in diagnosesList] oneHotDiagnosesQueryString = ''' create table {0}.temp4 as( '''.format( schemaName) + oneHotDiagnoses() + ''' ); ''' joinEverythingQueryString = ''' create table {0}.comorbid as ( select * from ( select {0}.temp2.race, {0}.temp2.sex, {0}.temp2.age, {0}.temp2.visit_type, {0}.temp4.* from {0}.temp4 inner join {0}.temp2 on {0}.temp4.patientid = {0}.temp2.patientid ) as x where CAST (age AS INTEGER) > 0 and ('''.format( schemaName) + getFilterString( 'visit_type', jsonConfig["inputs"]["settingFilterPath"]) + ''') ); ''' fullTableName = schemaName + "." + tableName if not checkTableExistence(schemaName, tableName): print('[preProcessDB] {0}.{1} not found. Generating now.'.format( schemaName, tableName)) print('[preProcessDB] Running queries. This might take a while ...') print('Filter race and join with typepatient ... ', end=" ") subroutineJoinTypepatient() print('done\n') print('Join with pdiagnose ... ', end=" ") subroutineJoinDiagnoses() print('done\n') print('One hot diagnoses and SUD ... ', end=" ") if pgIO.commitData(oneHotDiagnosesQueryString, dbName=dbName): print('done\n') else: print('fail\n') print('Join everything ... ', end=" ") if pgIO.commitData(joinEverythingQueryString, dbName=dbName): print('done\n') else: print('fail\n') print('Relabelling ...', end=" ") subroutineRelabelComorbid() print('done\n') else: print('[preProcessDB] {0}.{1} found. Skipping generation'.format( schemaName, tableName)) genRetrieve = pgIO.getDataIterator("select * from " + fullTableName, dbName=dbName, chunks=100) dbColumnQueryString = ''' SELECT column_name FROM information_schema.columns WHERE table_schema = '{0}' AND table_name = '{1}' '''.format(schemaName, tableName) dbColumns = pgIO.getAllData(dbColumnQueryString, dbName=dbName) dbColumns = [item[0] for item in dbColumns] tempArray = [] for idx, data in enumerate(genRetrieve): tempArray += data print("Chunk: " + str(idx)) rawData = pd.DataFrame(data=tempArray, columns=dbColumns) rawData = relabel(rawData, 'race', jsonConfig["inputs"]["raceFilterPath"]) rawData = relabel(rawData, 'visit_type', jsonConfig["inputs"]["settingFilterPath"]) try: #Save pickle to be sent to 'getUsefulInfo.py' fileObjectSave = open( jsonConfig["outputs"]["intermediatePath"] + "db.pickle", 'wb') miscData = [SUDList, diagnosesList, rawSUDList, rawDiagnosesList] pickle.dump((miscData, rawData), fileObjectSave) fileObjectSave.close() except Exception as e: logger.error(f'Issue saving to pickle: " {e}') return
def popTest4(logger): '''Populates test4 This function populates the table sarah.test4, which contains boolean columns for each mental disorder. If a user's row has True for that column, it means that he/she has that disorder, and vice versa. Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: all_userkeys = "../data/raw_data/SUDUser_keys.csv" with open(all_userkeys) as f: readCSV = csv.reader(f, delimiter=",") for user in tqdm(readCSV): getQuery = SQL(''' SELECT patientid, array_agg(distinct cast(dsmno as text)) && array[{}] as alc, array_agg(distinct cast(dsmno as text)) && array[{}] as cannabis, array_agg(distinct cast(dsmno as text)) && array[{}] as amphe, array_agg(distinct cast(dsmno as text)) && array[{}] as halluc, array_agg(distinct cast(dsmno as text)) && array[{}] as nicotin, array_agg(distinct cast(dsmno as text)) && array[{}] as cocaine, array_agg(distinct cast(dsmno as text)) && array[{}] as opioids, array_agg(distinct cast(dsmno as text)) && array[{}] as sedate, array_agg(distinct cast(dsmno as text)) && array[{}] as others, array_agg(distinct cast(dsmno as text)) && array[{}] as polysub, array_agg(distinct cast(dsmno as text)) && array[{}] as inhalant FROM rwe_version1_1.pdiagnose WHERE patientid = {} GROUP BY patientid ''').format( Literal(table2_config["params"]["sudcats"]["alc"]), Literal(table2_config["params"]["sudcats"]["cannabis"]), Literal(table2_config["params"]["sudcats"]["amphe"]), Literal(table2_config["params"]["sudcats"]["halluc"]), Literal(table2_config["params"]["sudcats"]["nicotin"]), Literal(table2_config["params"]["sudcats"]["cocaine"]), Literal(table2_config["params"]["sudcats"]["opioids"]), Literal(table2_config["params"]["sudcats"]["sedate"]), Literal(table2_config["params"]["sudcats"]["others"]), Literal(table2_config["params"]["sudcats"]["polysub"]), Literal(table2_config["params"]["sudcats"]["inhalant"]), Literal(int(user[0])) ) data = pgIO.getAllData(getQuery) pushQuery = ''' INSERT INTO sarah.test4(patientid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant) VALUES %s ''' deleteDupliQuery = ''' DELETE FROM sarah.test4 a USING ( SELECT MAX(ctid) as ctid, patientid FROM sarah.test4 GROUP BY patientid HAVING count(*) > 1 ) b WHERE a.patientid = b.patientid AND a.ctid <> b.ctid ''' value = pgIO.commitData(deleteDupliQuery) if value == True: print("Duplicate values succesfully deleted") print(pgIO.commitDataList(pushQuery, data)) except Exception as e: logger. error('Failed to populate test4 table because of {}'.format(e)) return
def createTest2(logger): '''Creates the table sarah.test2 This function creates the table sarah.test2 Parameters ---------- logger : {logging.Logger} The logger used for logging error information ''' try: createTableQuery = ''' CREATE TABLE sarah.test2( age integer, visit_type text, sex text, race text, patientid integer )''' value = pgIO.commitData(createTableQuery) if value == True: print("sarah.test2 table has been created") popTableQuery = SQL(''' WITH table_cte (age, setting, sex, race, patientid) as ( SELECT t1.age, t1.visit_type, t2.sex, t2.race, t1.patientid FROM rwe_version1_1.typepatient t1 INNER JOIN rwe_version1_1.background t2 ON t1.patientid = t2.patient WHERE t1.age < 100 AND t1.visit_type in {} AND t2.sex in {} AND t2.race in {} ) INSERT INTO sarah.test2(age, setting, sex, race, patientid) SELECT (array_agg(distinct age))[1] age, setting, sex, race, patientid FROM table_cte GROUP BY setting, sex, race, patientid ''').format(Literal(tuple(table1_config["params"]["setting"]["all"])), Literal(tuple(table1_config["params"]["sexes"]["all"])), Literal(tuple(table1_config["params"]["races"]["all"]))) value = pgIO.commitData(popTableQuery) if value == True: print("sarah.test2 table has been populated") deleteDupliQuery = ''' DELETE FROM sarah.test2 a USING ( SELECT MAX(ctid) as ctid, patientid FROM sarah.test2 GROUP BY patientid HAVING count(*) > 1 ) b WHERE a.patientid = b.patientid AND a.ctid <> b.ctid ''' value = pGIO.commitData(deleteDupliQuery) if value == True: print( "Duplicate values in sarah.test2 has been deleted and the earliest age is taken" ) except Exception as e: logger.error('Failed to generate table {}'.format(e)) return
def popDiagCols(logger): '''Populates sarah.test3 table Using the .csv file of the first filtered users' [siteid, backgroundid], these users' DSM numbers are aggregated into an array and compared against the array of DSM numbers of a specific diagnosis. If the user's DSM numbers are found in the diagnosis array, the column representing the diagnosis is filled in with a "True" value for the user, and vice versa. The columns created are stored in a new table sarah.test3, then the second filter is completed by removing users that have all their diagnosis columns set to "False" (i.e. they have no mental disorder test3 that we have specified) Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' try: all_userkeys = "../data/raw_data/firstfilter_allkeys.csv" with open(all_userkeys) as f: readCSV = csv.reader(f, delimiter=",") for user in tqdm(readCSV): getQuery = SQL(''' SELECT patientid, array_agg(distinct cast(dsmno as text)) && array[{}] as mood, array_agg(distinct cast(dsmno as text)) && array[{}] as anxiety, array_agg(distinct cast(dsmno as text)) && array[{}] as adjustment, array_agg(distinct cast(dsmno as text)) && array[{}] as adhd, array_agg(distinct cast(dsmno as text)) && array[{}] as sud, array_agg(distinct cast(dsmno as text)) && array[{}] as psyc, array_agg(distinct cast(dsmno as text)) && array[{}] as pers, array_agg(distinct cast(dsmno as text)) && array[{}] as childhood, array_agg(distinct cast(dsmno as text)) && array[{}] as impulse, array_agg(distinct cast(dsmno as text)) && array[{}] as cognitive, array_agg(distinct cast(dsmno as text)) && array[{}] as eating, array_agg(distinct cast(dsmno as text)) && array[{}] as smtf, array_agg(distinct cast(dsmno as text)) && array[{}] as disso, array_agg(distinct cast(dsmno as text)) && array[{}] as sleep, array_agg(distinct cast(dsmno as text)) && array[{}] as fd FROM rwe_version1_1.pdiagnose WHERE patientid = {} GROUP BY patientid ''').format( Literal(table1_config["params"]["categories"]["mood"]), Literal(table1_config["params"]["categories"]["anxiety"]), Literal( table1_config["params"]["categories"]["adjustment"]), Literal(table1_config["params"]["categories"]["adhd"]), Literal(table1_config["params"]["categories"]["sud"]), Literal(table1_config["params"]["categories"]["psyc"]), Literal(table1_config["params"]["categories"]["pers"]), Literal( table1_config["params"]["categories"]["childhood"]), Literal(table1_config["params"]["categories"]["impulse"]), Literal( table1_config["params"]["categories"]["cognitive"]), Literal(table1_config["params"]["categories"]["eating"]), Literal(table1_config["params"]["categories"]["smtf"]), Literal(table1_config["params"]["categories"]["disso"]), Literal(table1_config["params"]["categories"]["sleep"]), Literal(table1_config["params"]["categories"]["fd"]), Literal(int(user[0]))) data = pgIO.getAllData(getQuery) pushQuery = ''' INSERT INTO sarah.test3(patientid, mood, anxiety, adjustment, adhd, sud, psyc, pers, childhood, impulse, cognitive, eating, smtf, disso, sleep, fd) VALUES %s ''' print(pgIO.commitDataList(pushQuery, data)) deleteDupliQuery = ''' DELETE FROM sarah.test3 a USING ( SELECT MAX(ctid) as ctid, patientid FROM sarah.test3 GROUP BY patientid HAVING count(*) > 1 ) b WHERE a.patientid = b.patientid AND a.ctid <> b.ctid ''' value = pgIO.commitData(deleteDupliQuery) if value == True: print("Duplicate values succesfully deleted") f.close() except Exception as e: logger.error('Failed to add columns because of {}'.format(e)) return
def popDiagCols(logger): '''Populates tejas.restofusers table Using the .csv file of the first filtered users' [siteid, backgroundid], these users' DSM numbers are aggregated into an array and compared against the array of DSM numbers of a specific diagnosis. If the user's DSM numbers are found in the diagnosis array, the column representing the diagnosis is filled in with a "True" value for the user, and vice versa. The columns created are stored in a new table tejas.restofusers, then the second filter is completed by removing users that have all their diagnosis columns set to "False" (i.e. they have no mental disorder restofusers that we have specified) Decorators: lD.log Arguments: logger {logging.Logger} -- logs error information ''' with open(all_userkeys, 'r') as f: readCSV = csv.reader(f, delimiter=",") for user in tqdm(readCSV): getQuery = SQL(''' INSERT INTO tejas.restofusers(siteid, backgroundid, mood, anxiety, adjustment, adhd, sud, psyc, pers, childhood, impulse, cognitive, eating, smtf, disso, sleep, fd) SELECT siteid, backgroundid, array_agg(distinct cast(dsmno as text)) && array[{}] as mood, array_agg(distinct cast(dsmno as text)) && array[{}] as anxiety, array_agg(distinct cast(dsmno as text)) && array[{}] as adjustment, array_agg(distinct cast(dsmno as text)) && array[{}] as adhd, array_agg(distinct cast(dsmno as text)) && array[{}] as sud, array_agg(distinct cast(dsmno as text)) && array[{}] as psyc, array_agg(distinct cast(dsmno as text)) && array[{}] as pers, array_agg(distinct cast(dsmno as text)) && array[{}] as childhood, array_agg(distinct cast(dsmno as text)) && array[{}] as impulse, array_agg(distinct cast(dsmno as text)) && array[{}] as cognitive, array_agg(distinct cast(dsmno as text)) && array[{}] as eating, array_agg(distinct cast(dsmno as text)) && array[{}] as smtf, array_agg(distinct cast(dsmno as text)) && array[{}] as disso, array_agg(distinct cast(dsmno as text)) && array[{}] as sleep, array_agg(distinct cast(dsmno as text)) && array[{}] as fd FROM raw_data.pdiagnose WHERE siteid = {} AND backgroundid = {} GROUP BY siteid, backgroundid ''').format( Literal(table1_config["params"]["categories"]["mood"]), Literal(table1_config["params"]["categories"]["anxiety"]), Literal(table1_config["params"]["categories"]["adjustment"]), Literal(table1_config["params"]["categories"]["adhd"]), Literal(table1_config["params"]["categories"]["sud"]), Literal(table1_config["params"]["categories"]["psyc"]), Literal(table1_config["params"]["categories"]["pers"]), Literal(table1_config["params"]["categories"]["childhood"]), Literal(table1_config["params"]["categories"]["impulse"]), Literal(table1_config["params"]["categories"]["cognitive"]), Literal(table1_config["params"]["categories"]["eating"]), Literal(table1_config["params"]["categories"]["smtf"]), Literal(table1_config["params"]["categories"]["disso"]), Literal(table1_config["params"]["categories"]["sleep"]), Literal(table1_config["params"]["categories"]["fd"]), Literal(user[0]), Literal(user[1])) value = pgIO.commitData(getQuery) f.close() return
def createrace_t1(logger): '''Creates the table tejas.race_age_t1 This function creates the table tejas.race_age_t1 Parameters ---------- logger : {logging.Logger} The logger used for logging error information ''' try: makeNewQry = ''' CREATE TABLE tejas.race_age_t1 ( age text, sex text, visit_type text, race text, siteid text, backgroundid text ) ''' value = pgIO.commitData(makeNewQry) if value == True: print("tejas.race_age_t1 table has been created") popTableQuery = SQL(''' INSERT INTO tejas.race_age_t1 SELECT t1.age, t2.sex, t1.visit_type, t2.race, t1.siteid, t1.backgroundid FROM raw_data.typepatient t1 INNER JOIN raw_data.background t2 ON t1.backgroundid = t2.id AND t1.siteid = t2.siteid WHERE CAST (t1.age AS INTEGER) < 100 AND t1.visit_type in {} AND t2.sex in {} AND t2.race in {} ''').format(Literal(tuple(table1_config["params"]["settings"]["all"])), Literal(tuple(table1_config["params"]["sexes"]["all"])), Literal(tuple(table1_config["params"]["races"]["all"]))) value = pgIO.commitData(popTableQuery) if value == True: print("tejas.race_age_t1 table has been populated") makeNewQry = ''' CREATE TABLE tejas.race_age_t1new ( age text, sex text, visit_type text, race text, siteid text, backgroundid text ) ''' value = pgIO.commitData(makeNewQry) if value == True: print("tejas.race_age_t1new table has been created") deleteDupliQuery = ''' INSERT INTO tejas.race_age_t1new (age, sex, visit_type, race, siteid, backgroundid) SELECT (array_agg(distinct age))[1] as age, sex, visit_type, race, siteid, backgroundid FROM tejas.race_age_t1 GROUP BY siteid, backgroundid, sex, visit_type, race ''' value = pgIO.commitData(deleteDupliQuery) if value == True: print( "Duplicate values in tejas.race_age_t1 has been deleted and the earliest age is taken" ) except Exception as e: logger.error('Failed to generate table {}'.format(e)) return