Beispiel #1
0
def delAllFalseSUDusers(logger):
    makeNewQry = '''
    CREATE TABLE tejas.sud_users_new (
        siteid text,
        backgroundid text,
        alc bool,
        cannabis bool,
        amphe bool,
        halluc bool,
        nicotin bool,
        cocaine bool,
        opioids bool,
        sedate bool,
        others bool,
        polysub bool,
        inhalant bool,
        morethan2sud bool
    )
    '''
    value = pgIO.commitData(makeNewQry)
    if value == True:
        print("tejas.sud_users_new table has been created")

    deleteDupliQuery = '''
    INSERT INTO tejas.sud_users_new (siteid, backgroundid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant)
    SELECT
        siteid,
        backgroundid,
        alc,
        cannabis,
        amphe,
        halluc,
        nicotin,
        cocaine,
        opioids,
        sedate,
        others,
        polysub,
        inhalant
    FROM
        tejas.sud_users
    GROUP BY
        siteid,
        backgroundid,
        alc,
        cannabis,
        amphe,
        halluc,
        nicotin,
        cocaine,
        opioids,
        sedate,
        others,
        polysub,
        inhalant
    '''
    value = pgIO.commitData(deleteDupliQuery)
    if value == True:
        print("Duplicate values succesfully deleted")
    return
Beispiel #2
0
def addmorethan2sudcolumn(logger):
    '''Populates the 'morethan2sud' column in tejas.sud_race_age

    This function counts the number of 'True' for each mental disorder
    for each user in tejas.sud_race_age. If they have more than 1 'True' value,
    their 'morethan2sud' column will be set to 'True'.

    Decorators:
        lD.log

    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        query = '''
        SELECT
            siteid, backgroundid, alc, cannabis, amphe, halluc, nicotin,
            cocaine, opioids, sedate, others, polysub, inhalant
        FROM tejas.sud_race_age
        '''
        data = pgIO.getAllData(query)

        csvfile = '../data/raw_data/atleast2suduser_keys.csv'

        count = 0
        output = open(csvfile, 'w+')
        csv_output = csv.writer(output)
        for row in data:
            if sum(list(row[2:])) >= 2:
                csv_output.writerow(row)
        readCSV = csv.reader(open(csvfile), delimiter=",")
        for user in tqdm(readCSV):
            updateQuery = SQL('''
            UPDATE tejas.sud_race_age
            SET morethan2sud = true
            WHERE siteid = {}
            AND backgroundid = {}
            ''').format(Literal(user[0]), Literal(str(user[1])))
            value = pgIO.commitData(updateQuery)
            # print(type(user[0]))

        #Update column's null values to false
        updateQuery2 = '''
        UPDATE tejas.sud_race_age
        SET morethan2sud = false
        WHERE morethan2sud is null
        '''
        print(pgIO.commitData(updateQuery2))


    except Exception as e:
        logger.error('adding morethan2sud column to the database failed because of {}'.format(e))

    return
Beispiel #3
0
def relabelVar(logger):
    '''Relabels column values
    
    This function relabels Race and Settings values to standardised values.
    
    Decorators:
        lD.log
    
    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        # relabel_sex_success = []
        # for sex in table1_config["inputs"]["sexes"]:
        #     sex_query = SQL('''
        #     UPDATE sarah.test2
        #     SET sex = {}
        #     WHERE sex in {}
        #     ''').format(
        #         Literal(sex),
        #         Literal(tuple(table1_config["params"]["sexes"][sex]))
        #         )
        #     relabel_sex_success.append(pgIO.commitData(sex_query))
        # if False in relabel_sex_success:
        #     print("Relabelling sex not successful!")

        relabel_race_success = []
        for race in table1_config["inputs"]["races"]:
            race_query = SQL('''
            UPDATE sarah.test2
            SET race = {}
            WHERE race in {}
            ''').format(Literal(race),
                        Literal(tuple(table1_config["params"]["races"][race])))
            relabel_race_success.append(pgIO.commitData(race_query))
        if False in relabel_race_success:
            print("Relabelling race not successful!")

        relabel_setting_success = []
        for setting in table1_config["inputs"]["settings"]:
            setting_query = SQL('''
            UPDATE sarah.test2
            SET visit_type = {}
            WHERE visit_type in {}
            ''').format(
                Literal(setting),
                Literal(tuple(table1_config["params"]["settings"][setting])))
            relabel_setting_success.append(pgIO.commitData(setting_query))
        if False in relabel_setting_success:
            print("Relabelling setting not successful!")

    except Exception as e:
        logger.error('Failed to update table test2 because {}'.format(e))
def createTable(logger,
                schemaName,
                tableName,
                createTableQueryString,
                existsTableQueryString=''):

    if checkTableExistence(schemaName, tableName):
        if existsTableQueryString != '':
            pgIO.commitData(existsTableQueryString, dbName=dbName)
        return False
    else:
        pgIO.commitData(createTableQueryString, dbName=dbName)
        return True
Beispiel #5
0
def createJoined(logger):
    makeNew = '''
    CREATE TABLE tejas.sud_race_age (
    siteid text,
    backgroundid text,
    age text,
    sex text,
    visit_type text,
    race text,
    alc bool,
    cannabis bool,
    amphe bool,
    halluc bool,
    nicotin bool,
    cocaine bool,
    opioids bool,
    sedate bool,
    others bool,
    polysub bool,
    inhalant bool,
    morethan2sud bool
    )
    '''
    value = pgIO.commitData(makeNew)
    if value == True:
        print("tejas.sud_race_age table has been created")
    return
Beispiel #6
0
def popJoined(logger):
    popQry = '''
    INSERT INTO tejas.sud_race_age (
    siteid, backgroundid, age, sex, visit_type, race, alc, cannabis, amphe,
    halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant,
    morethan2sud)
    SELECT
        t1.siteid as siteid,
        t1.backgroundid as backgroundid,
        age, sex, visit_type, race,
        alc,
        cannabis,
        amphe,
        halluc,
        nicotin,
        cocaine,
        opioids,
        sedate,
        others,
        polysub,
        inhalant,
        morethan2sud
    FROM
        tejas.race_age_t1new t1
    INNER JOIN
        tejas.sud_users_new t2
    ON
        t1.siteid = t2.siteid
    AND
        t1.backgroundid = t2.backgroundid
    '''
    value = pgIO.commitData(popQry)
    if value == True:
        print("tejas.sud_race_age joined table has been populated")
    return
Beispiel #7
0
def createTest4Table(logger):
    '''Creates test4
    
    This function creates the table sarah.test4, which contains boolean columns 
    for each mental disorder.
    
    Decorators:
        lD.log
    
    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        create_query = '''
        CREATE TABLE sarah.test4(
        patientid integer,
        alc bool,
        cannabis bool,
        amphe bool,
        halluc bool,
        nicotin bool,
        cocaine bool,
        opioids bool,
        sedate bool,
        others bool,
        polysub bool,
        inhalant bool,
        morethan2sud bool
        )
        '''
        print(pgIO.commitData(create_query))

    except Exception as e:
        logger. error('Failed to create test4 table because of {}'.format(e))
    return
Beispiel #8
0
def createTest3(logger):
    '''Creates sarah.test3 table
    
    Decorators:
        lD.log
    
    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        query = '''
        CREATE TABLE sarah.test3(
            patientid text,
            mood bool,
            anxiety bool,
            adjustment bool,
            adhd bool,
            sud bool,
            psyc bool,
            pers bool,
            childhood bool,
            impulse bool, 
            cognitive bool,
            eating bool,
            smtf bool,
            disso bool,
            sleep bool,
            fd bool
            )'''
        value = pgIO.commitData(query)
        if value == True:
            print("sarah.test3 table has been successfully created")
    except Exception as e:
        logger.error('Unable to create table test3 because {}'.format(e))
    return
Beispiel #9
0
def cleanUp(logger):
    dropQuery = SQL('''
    DROP TABLE IF EXISTS tejas.restofusers;
    ''')
    value = pgIO.commitData(dropQuery)
    if value == True:
        print("All tables have been successfully deleted")
    return
Beispiel #10
0
    def recursiveQuery(totalRows,
                       recursionChunkSize=1000,
                       scalingFactor=0.1,
                       ttl=5):

        getFilterString('race', jsonConfig["inputs"]["raceFilterPath"])
        getFilterString('sex', jsonConfig["inputs"]["sexFilterPath"])
        getFilterString('sex', jsonConfig["inputs"]["sexFilterPath"])

        for idx in range(0, totalRows, recursionChunkSize):
            lowerBound = idx
            upperBound = idx + recursionChunkSize

            queryString = '''
                                INSERT into jingwen.temp2
                                with cte as
                                (
                                select *,
                                ROW_NUMBER() OVER (PARTITION BY id, siteid ORDER BY age asc) AS rn
                                from 
                                (
                                    select background.id, background.siteid, background.race, background.sex, 
                                    typepatient.age, typepatient.visit_type
                                    from
                                    (
                                        select id, siteid, race, sex from raw_data.background 
                                        where CAST (id as INTEGER) >= {} and CAST (id as INTEGER) < {}
                                        and
                                        '''.format(
                lowerBound, upperBound) + getFilterString(
                    'race', jsonConfig["inputs"]["raceFilterPath"]) + '''
                                        and
                                        ''' + getFilterString(
                        'sex', jsonConfig["inputs"]["sexFilterPath"]) + '''
                                    ) as background
                                    inner join 
                                    (
                                        select backgroundid, siteid, age, visit_type, created from raw_data.typepatient
                                        where 
                                        ''' + getFilterString(
                            'visit_type',
                            jsonConfig["inputs"]["settingFilterPath"]) + '''
                                        and (age IS NOT NULL )
                                    ) as typepatient
                                    on typepatient.backgroundid = background.id and typepatient.siteid = background.siteid
                                )as x
                                )
                                select id, siteid, race, sex, age, visit_type
                                from cte
                                where rn = 1    
                                                  
                                '''.format(lowerBound, upperBound)

            isSuccesfulFlag = pgIO.commitData(queryString, dbName=dbName)
            print("ID {} to {}: {}".format(lowerBound, upperBound,
                                           isSuccesfulFlag))

        return True
Beispiel #11
0
def alterTable(logger):
    alter = '''
    ALTER TABLE tejas.race_age_t1new
    ALTER COLUMN age TYPE INTEGER USING (age::integer)
    '''
    value = pgIO.commitData(alter)
    if value == True:
        print("successfully altered the age column")
    return
Beispiel #12
0
def subroutineRelabelComorbid(logger):

    fullTableName = jsonConfig["inputs"]["schemaName"] + "." + jsonConfig[
        "inputs"]["tableName"]

    queryStringList = []
    queryStringList += (getRelabelString(
        'race', jsonConfig["inputs"]["raceFilterPath"]))
    queryStringList += (getRelabelString(
        'visit_type', jsonConfig["inputs"]["settingFilterPath"]))
    queryStringList += (getRelabelString(
        'sex', jsonConfig["inputs"]["sexFilterPath"]))

    queryStringList.append('''
    ALTER TABLE {}
    ADD age_categorical text NULL;                                         
                                            '''.format(fullTableName))

    for item in [[1, 11], [12, 17], [18, 34], [35, 49]]:
        queryStringList.append('''
        UPDATE {}
        SET age_categorical='{}-{}'
        WHERE CAST ({}.age AS INTEGER) <= {} and CAST ({}.age AS INTEGER) >= {}
        '''.format(fullTableName, item[0], item[1], fullTableName, item[1],
                   fullTableName, item[0]))

    queryStringList.append('''
    UPDATE {}
    SET age_categorical='50+'
    WHERE CAST ({}.age AS INTEGER) >= 50
                                            '''.format(fullTableName,
                                                       fullTableName))

    queryStringList.append('''
    UPDATE {}
    SET age_categorical='0'
    WHERE CAST ({}.age AS INTEGER) = 0
                                            '''.format(fullTableName,
                                                       fullTableName))
    for relabelQuery in queryStringList:
        pgIO.commitData(relabelQuery, dbName=dbName)

    return
Beispiel #13
0
def createrestofusersT3part1(logger):
    try:
        query = '''
        CREATE TABLE tejas.restofusers_t3_p1(
            sud bool,
            race text,
            age text,
            sex text,
            visit_type text
        )'''
        value = pgIO.commitData(query)
        if value == True:
            print("tejas.restofusers_t3 table has been successfully created")
    except Exception as e:
        logger.error('Unable to create table restofusers because {}'.format(e))
    return
Beispiel #14
0
def poprestofusersT3part1(logger):
    getQuery = '''
    INSERT INTO tejas.restofusers_t3_p1(sud, race, age, sex, visit_type)
    SELECT t2.sud, t1.race, t1.age, t1.sex, t1.visit_type
    FROM tejas.race_age_t1new t1
    INNER JOIN tejas.restofusers t2
    ON t1.siteid = t2.siteid
    AND t1.backgroundid = t2.backgroundid
    WHERE (cast (age as int) >= 12)
    AND (cast (age as int) <= 100)
    GROUP BY (t1.siteid, t1.backgroundid, t2.sud, t1.race, t1.age, t1.sex,
    t1.visit_type)
    '''
    value = pgIO.commitData(getQuery)
    if value == True:
        print("successfully populated tejas.restofusers_t3")
    return
Beispiel #15
0
    def recursiveQuery(totalRows,
                       recursionChunkSize=1000,
                       scalingFactor=0.1,
                       ttl=5,
                       offset=0):

        for idx in range(offset, offset + totalRows, recursionChunkSize):
            lowerBound = idx
            upperBound = idx + recursionChunkSize

            queryString = '''
                                INSERT into {0}.temp3
                                SELECT temp2.*, y.dsmno
                                from {0}.temp2 as temp2
                                inner join
                                (
                                    select  patientid, dsmno
                                    from    
                                    (
                                        select temp2.patientid, {3}.pdiagnose.dsmno
                                        from
                                        (
                                            select patientid from {0}.temp2
                                            where CAST  (patientid as INTEGER) >= {1} and CAST (patientid as INTEGER) < {2}
                                        ) as temp2
                                        inner join {3}.pdiagnose 
                                        on CAST({3}.pdiagnose.patientid as TEXT) = CAST(temp2.patientid as TEXT)
                                    ) as x
                                    group by patientid, dsmno
                                ) as y
                                on CAST(y.patientid as TEXT) = CAST(temp2.patientid as TEXT)
                                '''.format(schemaName, lowerBound, upperBound,
                                           rawSchemaName)

            isSuccesfulFlag = pgIO.commitData(queryString, dbName=dbName)
            print("ID {} to {}: {}".format(lowerBound, upperBound,
                                           isSuccesfulFlag))

            if not isSuccesfulFlag:
                if ttl > 0 and recursionChunkSize * scalingFactor >= 1:
                    recursiveQuery(upperBound,   recursionChunkSize = round(recursionChunkSize*scalingFactor), \
                                                 scalingFactor = scalingFactor, \
                                                 ttl = ttl-1, \
                                                 offset = lowerBound )

        return
Beispiel #16
0
def delAllFalserestofusers(logger):
    '''Second filter of users from tejas.restofusers

    Deletes users who have no target mental disorder diagnoses.

    Decorators:
        lD.log

    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        query = '''
        DELETE FROM
            tejas.restofusers
        WHERE
            mood = false and
            anxiety = false and
            adjustment = false and
            adhd = false and
            sud = false and
            psyc = false and
            pers = false and
            childhood = false and
            impulse = false and
            cognitive = false and
            eating = false and
            smtf = false and
            disso = false and
            sleep = false and
            fd = false'''
        value = pgIO.commitData(query)
        if value == True:
            print(
                "Users with no diagnosis in tejas.restofusers table has been successfully deleted"
            )
    except Exception as e:
        logger.error(
            'Unable to delete from table restofusers because {}'.format(e))
    return
Beispiel #17
0
def createsud_usersTable(logger):
    '''Creates sud_users

    This function creates the table tejas.sud_users, which contains boolean columns
    for each mental disorder.

    Decorators:
        lD.log

    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        create_query = '''
        CREATE TABLE tejas.sud_users(
        siteid text,
        backgroundid text,
        alc bool,
        cannabis bool,
        amphe bool,
        halluc bool,
        nicotin bool,
        cocaine bool,
        opioids bool,
        sedate bool,
        others bool,
        polysub bool,
        inhalant bool,
        morethan2sud bool
        )
        '''
        value = pgIO.commitData(create_query)
        if value == True:
            print("tejas.sud_users table has been created")

    except Exception as e:
        logger.error(
            'Failed to create sud_users table because of {}'.format(e))
    return
Beispiel #18
0
    def recursiveQuery(totalRows,
                       recursionChunkSize=1000,
                       scalingFactor=0.1,
                       ttl=5):

        for idx in range(0, totalRows, recursionChunkSize):
            lowerBound = idx
            upperBound = idx + recursionChunkSize

            queryString = '''
                                INSERT into jingwen.temp3
                                SELECT temp2.*, y.dsmno
                                from jingwen.temp2 as temp2
                                inner join
                                (
                                    select  id, dsmno, siteid
                                    from    
                                    (
                                        select temp2.id, temp2.siteid, raw_data.pdiagnose.dsmno
                                        from
                                        (
                                            select id, siteid from jingwen.temp2
                                            where CAST (id as INTEGER) >= {} and CAST (id as INTEGER) < {}
                                        ) as temp2
                                        inner join raw_data.pdiagnose 
                                        on raw_data.pdiagnose.backgroundid = temp2.id and raw_data.pdiagnose.siteid = temp2.siteid
                                    ) as x
                                    group by id, dsmno, siteid
                                ) as y
                                on y.id = temp2.id and y.siteid = temp2.siteid
                                '''.format(lowerBound, upperBound)

            isSuccesfulFlag = pgIO.commitData(queryString, dbName=dbName)
            print("ID {} to {}: {}".format(lowerBound, upperBound,
                                           isSuccesfulFlag))

        return True
Beispiel #19
0
    def recursiveQuery(totalRows,
                       recursionChunkSize=1000,
                       scalingFactor=0.1,
                       ttl=5,
                       offset=0):

        raceFilter = getFilterString('race',
                                     jsonConfig["inputs"]["raceFilterPath"])
        sexFilter = getFilterString('sex',
                                    jsonConfig["inputs"]["sexFilterPath"],
                                    typeCategory='TEXT')
        settingFilter = getFilterString(
            'visit_type', jsonConfig["inputs"]["settingFilterPath"])

        for idx in range(offset, offset + totalRows, recursionChunkSize):
            lowerBound = idx
            upperBound = idx + recursionChunkSize

            queryString = '''
                                INSERT into {0}.temp2
                                with cte as
                                (
                                select *,
                                ROW_NUMBER() OVER (PARTITION BY patientid ORDER BY age asc) AS rn
                                from 
                                (
                                    select background.patientid, background.race, background.sex, 
                                    typepatient.age, typepatient.visit_type
                                    from
                                    (
                                        select patientid, race, sex from {3}.background 
                                        where CAST (patientid as INTEGER) >= {1} and CAST (patientid as INTEGER) < {2}
                                        and
                                        race is not null
                                        and
                                        '''.format(
                schemaName, lowerBound, upperBound,
                rawSchemaName) + raceFilter + '''
                                        and
                                        ''' + sexFilter + '''
                                    ) as background
                                    inner join 
                                    (
                                        select patientid, age, visit_type from {0}.typepatient
                                        where 
                                        '''.format(
                    rawSchemaName) + settingFilter + '''
                                        and (age IS NOT NULL )
                                    ) as typepatient
                                    on typepatient.patientid = background.patientid
                                )as x
                                )
                                select patientid, race, sex, age, visit_type
                                from cte
                                where rn = 1    
                                                  
                                '''

            isSuccesfulFlag = pgIO.commitData(queryString, dbName=dbName)
            print("ID {} to {}: {}".format(lowerBound, upperBound,
                                           isSuccesfulFlag))

            if not isSuccesfulFlag:
                if ttl > 0 and recursionChunkSize * scalingFactor >= 1:
                    recursiveQuery(upperBound,   recursionChunkSize = round(recursionChunkSize*scalingFactor), \
                                                 scalingFactor = scalingFactor, \
                                                 ttl = ttl-1, \
                                                 offset = lowerBound )
        return
Beispiel #20
0
def addmorethan2sudcolumn(logger):
    '''Populates the 'morethan2sud' column in sarah.test4
    
    This function counts the number of 'True' for each mental disorder 
    for each user in sarah.test4. If they have more than 1 'True' value,
    their 'morethan2sud' column will be set to 'True'.
    
    Decorators:
        lD.log
    
    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        query = '''
        SELECT 
            t1.patientid,
            t2.alc,
            t2.cannabis,
            t2.amphe,
            t2.halluc,
            t2.nicotin,
            t2.cocaine,
            t2.opioids,
            t2.sedate,
            t2.others,
            t2.polysub,
            t2.inhalant
        FROM
            sarah.test2 t1
        INNER JOIN 
            sarah.test4 t2
        ON
            t1.patientid = t2.patientid
        '''

        data = pgIO.getAllData(query)

        csvfile = '../data/raw_data/morethan2suduser_keys.csv'

        with open(csvfile, 'w+') as output:
            csv_output = csv.writer(output)

            for row in data:
                if sum(list(row[1:12])) >= 2:
                    csv_output.writerow(row)
        output.close()

        with open(csvfile) as f:
            readCSV = csv.reader(f, delimiter=",")

            for user in tqdm(readCSV):
                updateQuery = '''
                UPDATE 
                    sarah.test4
                SET 
                    morethan2sud = True
                WHERE
                   patientid = {}
                '''.format(user[0])
                print(pgIO.commitData(updateQuery))

                # print(type(user[0]))

        #Update column's null values to false
        updateQuery2 = '''
        UPDATE 
            sarah.test4
        SET 
            morethan2sud = False
        WHERE
           morethan2sud is null
        '''
        print(pgIO.commitData(updateQuery2))

    except Exception as e:
        logger.error(
            'adding morethan2sud column to the databse failed because of {}'.
            format(e))

    return
Beispiel #21
0
def popsud_users(logger):
    '''Populates sud_users

    This function populates the table tejas.sud_users, which contains boolean columns
    for each mental disorder. If a user's row has True for that column, it means
    that he/she has that disorder, and vice versa.

    Decorators:
        lD.log

    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        with open(all_userkeys) as f:
            readCSV = csv.reader(f, delimiter=",")

            for user in tqdm(readCSV):
                # print(user)
                popTableQuery = SQL('''
                INSERT INTO
                    tejas.sud_users(siteid, backgroundid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant)
                SELECT
                    siteid,
                    backgroundid,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as alc,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as cannabis,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as amphe,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as halluc,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as nicotin,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as cocaine,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as opioids,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as sedate,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as others,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as polysub,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as inhalant
                FROM
                    raw_data.pdiagnose
                WHERE
                    siteid = {}
                AND
                    backgroundid = {}
                GROUP BY
                    siteid, backgroundid
                ''').format(
                    Literal(table2_config["params"]["sudcats"]["alc"]),
                    Literal(table2_config["params"]["sudcats"]["cannabis"]),
                    Literal(table2_config["params"]["sudcats"]["amphe"]),
                    Literal(table2_config["params"]["sudcats"]["halluc"]),
                    Literal(table2_config["params"]["sudcats"]["nicotin"]),
                    Literal(table2_config["params"]["sudcats"]["cocaine"]),
                    Literal(table2_config["params"]["sudcats"]["opioids"]),
                    Literal(table2_config["params"]["sudcats"]["sedate"]),
                    Literal(table2_config["params"]["sudcats"]["others"]),
                    Literal(table2_config["params"]["sudcats"]["polysub"]),
                    Literal(table2_config["params"]["sudcats"]["inhalant"]),
                    Literal(user[0]), Literal(user[1]))
                value = pgIO.commitData(popTableQuery)
    except Exception as e:
        logger.error(
            'Failed to populate sud_users table because of {}'.format(e))
    return
Beispiel #22
0
def main(logger, resultsDict):

    raceList = pd.read_csv(
        jsonConfig["inputs"]["raceFilterPath"])['category'].unique()
    SUDList = pd.read_csv(jsonConfig["inputs"]["dsmSUDPath"]).columns.tolist()
    diagnosesList = pd.read_csv(
        jsonConfig["inputs"]["dsmDiagnosesPath"]).columns.tolist()

    rawSUDList = SUDList
    rawDiagnosesList = diagnosesList

    SUDList = [headerParse(item) for item in SUDList]
    diagnosesList = [headerParse(item) for item in diagnosesList]

    oneHotDiagnosesQueryString = '''
                                            create table {0}.temp4 as(
                                            '''.format(
        schemaName) + oneHotDiagnoses() + '''
                                            );
                                            '''

    joinEverythingQueryString = '''
                                            create table {0}.comorbid as
                                            (
                                            select * from
                                            (
                                            select {0}.temp2.race, {0}.temp2.sex, {0}.temp2.age, {0}.temp2.visit_type, {0}.temp4.*
                                            from {0}.temp4
                                            inner join {0}.temp2
                                            on {0}.temp4.patientid = {0}.temp2.patientid
                                            ) as x
                                            where CAST (age AS INTEGER) > 0
                                            and ('''.format(
        schemaName) + getFilterString(
            'visit_type', jsonConfig["inputs"]["settingFilterPath"]) + ''')
                                            );
                                            '''

    fullTableName = schemaName + "." + tableName

    if not checkTableExistence(schemaName, tableName):

        print('[preProcessDB] {0}.{1} not found. Generating now.'.format(
            schemaName, tableName))

        print('[preProcessDB] Running queries. This might take a while ...')

        print('Filter race and join with typepatient ... ', end=" ")
        subroutineJoinTypepatient()
        print('done\n')

        print('Join with pdiagnose ... ', end=" ")
        subroutineJoinDiagnoses()
        print('done\n')

        print('One hot diagnoses and SUD ... ', end=" ")
        if pgIO.commitData(oneHotDiagnosesQueryString, dbName=dbName):
            print('done\n')
        else:
            print('fail\n')

        print('Join everything ... ', end=" ")
        if pgIO.commitData(joinEverythingQueryString, dbName=dbName):
            print('done\n')
        else:
            print('fail\n')

        print('Relabelling ...', end=" ")
        subroutineRelabelComorbid()
        print('done\n')

    else:

        print('[preProcessDB] {0}.{1} found. Skipping generation'.format(
            schemaName, tableName))

    genRetrieve = pgIO.getDataIterator("select * from " + fullTableName,
                                       dbName=dbName,
                                       chunks=100)

    dbColumnQueryString = '''
                                SELECT column_name
                                FROM information_schema.columns
                                WHERE table_schema = '{0}'
                                AND table_name = '{1}'
                                '''.format(schemaName, tableName)

    dbColumns = pgIO.getAllData(dbColumnQueryString, dbName=dbName)

    dbColumns = [item[0] for item in dbColumns]

    tempArray = []
    for idx, data in enumerate(genRetrieve):
        tempArray += data
        print("Chunk: " + str(idx))

    rawData = pd.DataFrame(data=tempArray, columns=dbColumns)

    rawData = relabel(rawData, 'race', jsonConfig["inputs"]["raceFilterPath"])
    rawData = relabel(rawData, 'visit_type',
                      jsonConfig["inputs"]["settingFilterPath"])

    try:  #Save pickle to be sent to 'getUsefulInfo.py'
        fileObjectSave = open(
            jsonConfig["outputs"]["intermediatePath"] + "db.pickle", 'wb')
        miscData = [SUDList, diagnosesList, rawSUDList, rawDiagnosesList]
        pickle.dump((miscData, rawData), fileObjectSave)
        fileObjectSave.close()

    except Exception as e:
        logger.error(f'Issue saving to pickle: " {e}')

    return
Beispiel #23
0
def popTest4(logger):
    '''Populates test4
    
    This function populates the table sarah.test4, which contains boolean columns 
    for each mental disorder. If a user's row has True for that column, it means
    that he/she has that disorder, and vice versa. 
    
    Decorators:
        lD.log
    
    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:

        all_userkeys = "../data/raw_data/SUDUser_keys.csv"

        with open(all_userkeys) as f:
            readCSV = csv.reader(f, delimiter=",")

            for user in tqdm(readCSV):

                getQuery = SQL('''
                SELECT
                    patientid,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as alc,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as cannabis,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as amphe,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as halluc,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as nicotin,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as cocaine,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as opioids,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as sedate,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as others,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as polysub,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as inhalant
                FROM
                    rwe_version1_1.pdiagnose
                WHERE
                    patientid = {}
                GROUP BY
                    patientid
                ''').format(
                    Literal(table2_config["params"]["sudcats"]["alc"]),
                    Literal(table2_config["params"]["sudcats"]["cannabis"]),
                    Literal(table2_config["params"]["sudcats"]["amphe"]),
                    Literal(table2_config["params"]["sudcats"]["halluc"]),
                    Literal(table2_config["params"]["sudcats"]["nicotin"]),
                    Literal(table2_config["params"]["sudcats"]["cocaine"]),
                    Literal(table2_config["params"]["sudcats"]["opioids"]),
                    Literal(table2_config["params"]["sudcats"]["sedate"]),
                    Literal(table2_config["params"]["sudcats"]["others"]),
                    Literal(table2_config["params"]["sudcats"]["polysub"]),
                    Literal(table2_config["params"]["sudcats"]["inhalant"]),
                    Literal(int(user[0]))
                )

                data = pgIO.getAllData(getQuery)

                pushQuery = '''
                INSERT INTO 
                    sarah.test4(patientid, alc, cannabis, amphe, halluc, nicotin, cocaine, opioids, sedate, others, polysub, inhalant)
                VALUES
                    %s
                '''

                deleteDupliQuery = '''
                DELETE FROM sarah.test4 a USING (
                    SELECT MAX(ctid) as ctid, patientid
                    FROM sarah.test4
                    GROUP BY patientid HAVING count(*) > 1
                    ) b
                WHERE a.patientid = b.patientid
                AND a.ctid <> b.ctid
                '''
                value = pgIO.commitData(deleteDupliQuery)
                if value == True:
                    print("Duplicate values succesfully deleted")

                print(pgIO.commitDataList(pushQuery, data))


    except Exception as e:
        logger. error('Failed to populate test4 table because of {}'.format(e))
    return
Beispiel #24
0
def createTest2(logger):
    '''Creates the table sarah.test2
    
    This function creates the table sarah.test2
    
    Parameters
    ----------
    logger : {logging.Logger}
        The logger used for logging error information
    '''

    try:
        createTableQuery = '''
        CREATE TABLE sarah.test2(
            age integer,
            visit_type text,
            sex text,
            race text, 
            patientid integer
        )'''

        value = pgIO.commitData(createTableQuery)
        if value == True:
            print("sarah.test2 table has been created")

        popTableQuery = SQL('''
        WITH table_cte (age, setting, sex, race, patientid)
        as
        (
        SELECT 
            t1.age,
            t1.visit_type, 
            t2.sex,
            t2.race,
            t1.patientid
        FROM 
            rwe_version1_1.typepatient t1
        INNER JOIN 
            rwe_version1_1.background t2
        ON
            t1.patientid = t2.patient
        WHERE 
            t1.age < 100
        AND
            t1.visit_type in {}
        AND
            t2.sex in {}
        AND
            t2.race in {}
        )
        INSERT INTO sarah.test2(age, setting, sex, race, patientid)
        SELECT 
            (array_agg(distinct age))[1] age,
            setting, 
            sex, 
            race, 
            patientid
        FROM 
            table_cte
        GROUP BY
            setting, sex, race, patientid
        ''').format(Literal(tuple(table1_config["params"]["setting"]["all"])),
                    Literal(tuple(table1_config["params"]["sexes"]["all"])),
                    Literal(tuple(table1_config["params"]["races"]["all"])))

        value = pgIO.commitData(popTableQuery)
        if value == True:
            print("sarah.test2 table has been populated")

        deleteDupliQuery = '''
        DELETE FROM sarah.test2 a USING (
        SELECT MAX(ctid) as ctid, patientid
        FROM sarah.test2
        GROUP BY patientid HAVING count(*) > 1
        ) b
        WHERE a.patientid = b.patientid
        AND a.ctid <> b.ctid
        '''
        value = pGIO.commitData(deleteDupliQuery)
        if value == True:
            print(
                "Duplicate values in sarah.test2 has been deleted and the earliest age is taken"
            )

    except Exception as e:
        logger.error('Failed to generate table {}'.format(e))
    return
Beispiel #25
0
def popDiagCols(logger):
    '''Populates sarah.test3 table
    
    Using the .csv file of the first filtered users' [siteid, backgroundid], these users' DSM numbers are aggregated into an array and compared against the array of DSM numbers of a specific diagnosis.
    If the user's DSM numbers are found in the diagnosis array, the column representing the diagnosis is filled in with a "True" value for the user, and vice versa.
    The columns created are stored in a new table sarah.test3, then the second filter is completed by removing users that have all their diagnosis columns set to "False" (i.e. they have no mental disorder test3 that we have specified) 
    
    Decorators:
        lD.log
    
    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    try:
        all_userkeys = "../data/raw_data/firstfilter_allkeys.csv"

        with open(all_userkeys) as f:
            readCSV = csv.reader(f, delimiter=",")

            for user in tqdm(readCSV):

                getQuery = SQL('''
                SELECT
                    patientid,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as mood,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as anxiety,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as adjustment,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as adhd,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as sud,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as psyc,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as pers,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as childhood,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as impulse,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as cognitive,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as eating,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as smtf,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as disso,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as sleep,
                    array_agg(distinct cast(dsmno as text)) && array[{}] as fd
                FROM
                    rwe_version1_1.pdiagnose
                WHERE
                    patientid = {}
                GROUP BY
                    patientid
                ''').format(
                    Literal(table1_config["params"]["categories"]["mood"]),
                    Literal(table1_config["params"]["categories"]["anxiety"]),
                    Literal(
                        table1_config["params"]["categories"]["adjustment"]),
                    Literal(table1_config["params"]["categories"]["adhd"]),
                    Literal(table1_config["params"]["categories"]["sud"]),
                    Literal(table1_config["params"]["categories"]["psyc"]),
                    Literal(table1_config["params"]["categories"]["pers"]),
                    Literal(
                        table1_config["params"]["categories"]["childhood"]),
                    Literal(table1_config["params"]["categories"]["impulse"]),
                    Literal(
                        table1_config["params"]["categories"]["cognitive"]),
                    Literal(table1_config["params"]["categories"]["eating"]),
                    Literal(table1_config["params"]["categories"]["smtf"]),
                    Literal(table1_config["params"]["categories"]["disso"]),
                    Literal(table1_config["params"]["categories"]["sleep"]),
                    Literal(table1_config["params"]["categories"]["fd"]),
                    Literal(int(user[0])))

                data = pgIO.getAllData(getQuery)

                pushQuery = '''
                INSERT INTO 
                    sarah.test3(patientid, mood, anxiety, adjustment, adhd, sud, psyc, pers, childhood, impulse, cognitive, eating, smtf, disso, sleep, fd)
                VALUES
                    %s
                '''
                print(pgIO.commitDataList(pushQuery, data))

                deleteDupliQuery = '''
                DELETE FROM sarah.test3 a USING (
                    SELECT MAX(ctid) as ctid, patientid
                    FROM sarah.test3
                    GROUP BY patientid HAVING count(*) > 1
                    ) b
                WHERE a.patientid = b.patientid
                AND a.ctid <> b.ctid
                '''
                value = pgIO.commitData(deleteDupliQuery)
                if value == True:
                    print("Duplicate values succesfully deleted")
        f.close()

    except Exception as e:
        logger.error('Failed to add columns because of {}'.format(e))
    return
Beispiel #26
0
def popDiagCols(logger):
    '''Populates tejas.restofusers table

    Using the .csv file of the first filtered users' [siteid, backgroundid], these users' DSM numbers are aggregated into an array and compared against the array of DSM numbers of a specific diagnosis.
    If the user's DSM numbers are found in the diagnosis array, the column representing the diagnosis is filled in with a "True" value for the user, and vice versa.
    The columns created are stored in a new table tejas.restofusers, then the second filter is completed by removing users that have all their diagnosis columns set to "False" (i.e. they have no mental disorder restofusers that we have specified)

    Decorators:
        lD.log

    Arguments:
        logger {logging.Logger} -- logs error information
    '''
    with open(all_userkeys, 'r') as f:
        readCSV = csv.reader(f, delimiter=",")

        for user in tqdm(readCSV):
            getQuery = SQL('''
            INSERT INTO
                tejas.restofusers(siteid,
                backgroundid,
                mood,
                anxiety,
                adjustment,
                adhd,
                sud,
                psyc,
                pers,
                childhood,
                impulse,
                cognitive,
                eating,
                smtf,
                disso,
                sleep,
                fd)
            SELECT
                siteid,
                backgroundid,
                array_agg(distinct cast(dsmno as text)) && array[{}] as mood,
                array_agg(distinct cast(dsmno as text)) && array[{}] as anxiety,
                array_agg(distinct cast(dsmno as text)) && array[{}] as adjustment,
                array_agg(distinct cast(dsmno as text)) && array[{}] as adhd,
                array_agg(distinct cast(dsmno as text)) && array[{}] as sud,
                array_agg(distinct cast(dsmno as text)) && array[{}] as psyc,
                array_agg(distinct cast(dsmno as text)) && array[{}] as pers,
                array_agg(distinct cast(dsmno as text)) && array[{}] as childhood,
                array_agg(distinct cast(dsmno as text)) && array[{}] as impulse,
                array_agg(distinct cast(dsmno as text)) && array[{}] as cognitive,
                array_agg(distinct cast(dsmno as text)) && array[{}] as eating,
                array_agg(distinct cast(dsmno as text)) && array[{}] as smtf,
                array_agg(distinct cast(dsmno as text)) && array[{}] as disso,
                array_agg(distinct cast(dsmno as text)) && array[{}] as sleep,
                array_agg(distinct cast(dsmno as text)) && array[{}] as fd
            FROM
                raw_data.pdiagnose
            WHERE
                siteid = {}
            AND
                backgroundid = {}
            GROUP BY
                siteid, backgroundid
            ''').format(
                Literal(table1_config["params"]["categories"]["mood"]),
                Literal(table1_config["params"]["categories"]["anxiety"]),
                Literal(table1_config["params"]["categories"]["adjustment"]),
                Literal(table1_config["params"]["categories"]["adhd"]),
                Literal(table1_config["params"]["categories"]["sud"]),
                Literal(table1_config["params"]["categories"]["psyc"]),
                Literal(table1_config["params"]["categories"]["pers"]),
                Literal(table1_config["params"]["categories"]["childhood"]),
                Literal(table1_config["params"]["categories"]["impulse"]),
                Literal(table1_config["params"]["categories"]["cognitive"]),
                Literal(table1_config["params"]["categories"]["eating"]),
                Literal(table1_config["params"]["categories"]["smtf"]),
                Literal(table1_config["params"]["categories"]["disso"]),
                Literal(table1_config["params"]["categories"]["sleep"]),
                Literal(table1_config["params"]["categories"]["fd"]),
                Literal(user[0]), Literal(user[1]))
            value = pgIO.commitData(getQuery)
    f.close()
    return
Beispiel #27
0
def createrace_t1(logger):
    '''Creates the table tejas.race_age_t1

    This function creates the table tejas.race_age_t1

    Parameters
    ----------
    logger : {logging.Logger}
        The logger used for logging error information
    '''

    try:
        makeNewQry = '''
        CREATE TABLE tejas.race_age_t1 (
            age text,
            sex text,
            visit_type text,
            race text,
            siteid text,
            backgroundid text
        )
        '''

        value = pgIO.commitData(makeNewQry)
        if value == True:
            print("tejas.race_age_t1 table has been created")

        popTableQuery = SQL('''
        INSERT INTO tejas.race_age_t1
        SELECT
        	t1.age,
            t2.sex,
        	t1.visit_type,
        	t2.race,
        	t1.siteid,
        	t1.backgroundid
    	FROM
        	raw_data.typepatient t1
    	INNER JOIN
        	raw_data.background t2
    	ON
        	t1.backgroundid = t2.id
    	AND
        	t1.siteid = t2.siteid
    	WHERE
        	CAST (t1.age AS INTEGER) < 100
    	AND
        	t1.visit_type in {}
    	AND
        	t2.sex in {}
    	AND
        	t2.race in {}
    	''').format(Literal(tuple(table1_config["params"]["settings"]["all"])),
                 Literal(tuple(table1_config["params"]["sexes"]["all"])),
                 Literal(tuple(table1_config["params"]["races"]["all"])))

        value = pgIO.commitData(popTableQuery)
        if value == True:
            print("tejas.race_age_t1 table has been populated")
        makeNewQry = '''
        CREATE TABLE tejas.race_age_t1new (
            age text,
            sex text,
            visit_type text,
            race text,
            siteid text,
            backgroundid text
        )
        '''

        value = pgIO.commitData(makeNewQry)
        if value == True:
            print("tejas.race_age_t1new table has been created")
        deleteDupliQuery = '''
        INSERT INTO tejas.race_age_t1new (age, sex, visit_type, race, siteid, backgroundid)
        SELECT
            (array_agg(distinct age))[1] as age,
            sex,
            visit_type,
            race,
            siteid,
            backgroundid
        FROM
            tejas.race_age_t1
        GROUP BY
        	siteid,
        	backgroundid,
        	sex,
        	visit_type,
        	race
        '''
        value = pgIO.commitData(deleteDupliQuery)
        if value == True:
            print(
                "Duplicate values in tejas.race_age_t1 has been deleted and the earliest age is taken"
            )

    except Exception as e:
        logger.error('Failed to generate table {}'.format(e))
    return