Esempio n. 1
0
def get_visits_by_subject_by_month():
    query = """
         SELECT visits.id as visit_id, 
                visits.created_date,
                visits.updated_at,
                visits.location_id,
                visits.device_id,
                visits.subject_id
            from dbo.visits as visits

        ORDER BY updated_at DESC
    """
    conn = udb.get_conn()
    df = pd.read_sql(query, conn)

    location_mapper = etl.reverse_dict(udb.pd_get_id_map('locations'))
    df['location'] = df['location_id'].apply(lambda w: location_mapper[w] if not pd.isnull(w) else 'NaN')
    df['month'] = df['created_date'].dt.month
    df['year'] = df['created_date'].dt.year
    df['date'] = df['created_date'].dt.date

    subjects_table = get_subjects_table()

    subjects_grouped = df.groupby(['location', 'subject_id', 'month', 'year']).count()
    subjects = subjects_grouped.reset_index()
    subjects['subject'] = subjects['subject_id'].apply(lambda w: subjects_table[w] if w in subjects_table else w)

    subjects_visits = subjects[['location', 'month', 'year', 'subject', 'visit_id']]
    subjects_visits = subjects_visits.rename(columns={'visit_id': 'visits'})

    return subjects_visits
Esempio n. 2
0
def get_next_visit_exam_to_process():
    # Get the most recent visit - not processed, and not in processing
    # returns a data frame
    query = """
                SELECT id, visit_id, visit_exam_processed, created_date
                FROM dbo.visit_exams
                WHERE exam_version IS NOT NULL
                AND visit_exam_processed = false
                AND visit_exam_processing = false
                AND status IS NULL
               
                ORDER BY created_date ASC LIMIT 1;
            """

    # TODO: add in audio_transcribed=true to sql query
    conn = udb.get_conn()
    df = pd.read_sql(query, conn)

    # validate the output
    if len(df) == 1:
        visit_exam_series = df.iloc[0]
        #visit_exam_series = visit_exam_series.insert(params=json.loads(visit_exam_series['reports_json']))
        # visit_exam_series = visit_exam_series.append(pd.Series({'params': json.loads(visit_exam_series['reports_json'])}))
        #visit_exam_series['params'] = json.loads(visit_exam_series['reports_json'])
    elif len(df) > 1:
        logger.error(
            "number of visit exams is greater than one, check the 'get_next_visit_exam_to_process' sql code"
        )
    else:
        logger.info("no visits left to process")
        visit_exam_series = None

    return visit_exam_series
Esempio n. 3
0
def get_progress_summary(timeframe, drop_react=True, drop_test=False):
    """
    Gets all of the visits and visit_exams during the timeframe specified in days
    :param timeframe: number of days prior to current_date --> e.g. timeframe = 7 is the last week
    :return: summary_df
    """
    if len(timeframe) == 1:
        if timeframe[0]=='CURRENT_DATE':
            date_str = "WHERE visits.created_date > " + timeframe[0]
        else:
            date_str = "WHERE visits.created_date > '" + timeframe[0] + "'"
    elif len(timeframe) == 2:
        date_str = "WHERE visits.created_date BETWEEN '" + timeframe[0] + " 00:00:00' AND '" + timeframe[1] + " 23:59:59'"
    else:
        raise Exception("please input a valid date range, timeframe can only be 1 or 2 values")
    query = """
                 SELECT visits.id as visit_id, 
                        dbo.visit_exams.id as visit_exam_id,
                        dbo.visit_exams.exam_id,
                        dbo.visit_exams.exam_version,
                        visits.created_date,
                        visits.reports_processed,
                        dbo.visit_exams.has_error,
                        dbo.visit_exams.error_description,
                        visits.s3_folder,
                        visits.updated_at,
                        visits.location_id,
                        visits.device_id,
                        dbo.subjects.first_name,
                        dbo.subjects.id as subject_id
                    from dbo.visits as visits
                LEFT JOIN dbo.visit_exams ON visits.id=dbo.visit_exams.visit_id
                RIGHT JOIN dbo.subjects ON visits.subject_id=dbo.subjects.id
                {}
                ORDER BY updated_at DESC
    """.format(date_str, timeframe)
    conn = udb.get_conn()
    summary_df = pd.read_sql(query, conn)
    exam_mapper = etl.reverse_dict(udb.pd_get_id_map('exams'))
    summary_df['exam'] = summary_df['exam_id'].apply(lambda w: exam_mapper[w] if not pd.isnull(w) else 'NaN')
    location_mapper = etl.reverse_dict(udb.pd_get_id_map('locations'))
    summary_df['location'] = summary_df['location_id'].apply(lambda w: location_mapper[w] if not pd.isnull(w) else 'NaN')

    if drop_react == True:
        summary_df = summary_df.loc[~(summary_df['location'] == 'Test Site one')]

    summary_df['subject'] = np.where(summary_df['first_name']=='Test', 'Test', 'User')
    summary_df.drop(['first_name'], axis=1, inplace=True)

    if drop_test == True:
        summary_df = summary_df.loc[~(summary_df['subject'] == 'Test')]

    # audio_present = summary_df.apply(udb.audio_verification, axis=1)
    # csv_present = summary_df.apply(udb.csv_verification, axis=1)
    # summary_df.assign(audio_present=audio_present, data_present=csv_present)

    return summary_df
Esempio n. 4
0
def get_visit_exams_left():
    query = """
                SELECT id FROM dbo.visit_exams
                WHERE visit_exam_processed = false
                AND visit_exam_processing = false
                AND exam_version IS NOT NULL
                AND status IS NULL

            """
    conn = udb.get_conn()
    df = pd.read_sql(query, conn)
    return df['id'].values
Esempio n. 5
0
def get_subjects_table():
    query = """
             SELECT subjects.first_name,
                    subjects.last_name,
                    subjects.id
                from dbo.subjects as subjects
            ORDER BY updated_at DESC
            """
    conn = udb.get_conn()
    df = pd.read_sql(query, conn)

    df['full_name'] = subjects_df['first_name'] + ' ' + subjects_df['last_name']
    subjects_lookup_table = dict(zip(df['id'], df['full_name']))

    return subjects_lookup_table
Esempio n. 6
0
def visit_exam_in_progress():
    query = """
                SELECT id FROM dbo.visit_exams
                WHERE visit_exam_processing = true
            """
    conn = udb.get_conn()
    df = pd.read_sql(query, conn)
    conn.close()

    nprocessing = len(df)

    if nprocessing == 0:
        return False
    elif nprocessing in [1, 2]:
        return df.iloc[0]['id']
    else:
        raise Exception("{} visit_exams getting processed at once".format(
            str(nprocessing)))
Esempio n. 7
0
def get_visits_by_location(timeframe):
    date_str = "WHERE visits.created_date > '" + timeframe[0] + "'"
    query = """
         SELECT visits.id as visit_id, 
                dbo.visit_exams.id as visit_exam_id,
                dbo.visit_exams.exam_id,
                dbo.visit_exams.exam_version,
                visits.created_date,
                visits.updated_at,
                visits.location_id,
                visits.device_id,
                dbo.subjects.first_name,
                dbo.subjects.last_name,
                dbo.subjects.id as subject_id
            from dbo.visits as visits
        LEFT JOIN dbo.visit_exams ON visits.id=dbo.visit_exams.visit_id
        RIGHT JOIN dbo.subjects ON visits.subject_id=dbo.subjects.id
        {}
        ORDER BY updated_at DESC
    """.format(date_str, timeframe)
    conn = udb.get_conn()
    df = pd.read_sql(query, conn)

    location_mapper = etl.reverse_dict(udb.pd_get_id_map('locations'))
    df['location'] = df['location_id'].apply(lambda w: location_mapper[w] if not pd.isnull(w) else 'NaN')
    df['month'] = df['created_date'].dt.month
    df['year'] = df['created_date'].dt.year
    df['date'] = df['created_date'].dt.date
    df = df[['visit_id', 'created_date', 'updated_at', 'location', 'month', 'year', 'date']]

    visits_by_location = df.groupby(['location', 'month', 'year']).count()
    vbl = visits_by_location.reset_index()
    vbl = vbl.drop(['created_date', 'updated_at', 'date'], axis=1)
    vbl = vbl.rename(columns={'visit_id': 'visits'})

    month_dict = {1: 'January', 2: 'February', 3: 'March', 4: 'April',
                  5: 'May', 6: 'June', 7: 'July', 8: 'August',
                  9: 'September', 10: 'October', 11: 'November', 12: 'December'}

    vbl['month'] = vbl['month'].apply(lambda w: month_dict[w] if w in month_dict.keys() else w)

    return df, vbl