def get_visits_by_subject_by_month(): query = """ SELECT visits.id as visit_id, visits.created_date, visits.updated_at, visits.location_id, visits.device_id, visits.subject_id from dbo.visits as visits ORDER BY updated_at DESC """ conn = udb.get_conn() df = pd.read_sql(query, conn) location_mapper = etl.reverse_dict(udb.pd_get_id_map('locations')) df['location'] = df['location_id'].apply(lambda w: location_mapper[w] if not pd.isnull(w) else 'NaN') df['month'] = df['created_date'].dt.month df['year'] = df['created_date'].dt.year df['date'] = df['created_date'].dt.date subjects_table = get_subjects_table() subjects_grouped = df.groupby(['location', 'subject_id', 'month', 'year']).count() subjects = subjects_grouped.reset_index() subjects['subject'] = subjects['subject_id'].apply(lambda w: subjects_table[w] if w in subjects_table else w) subjects_visits = subjects[['location', 'month', 'year', 'subject', 'visit_id']] subjects_visits = subjects_visits.rename(columns={'visit_id': 'visits'}) return subjects_visits
def get_next_visit_exam_to_process(): # Get the most recent visit - not processed, and not in processing # returns a data frame query = """ SELECT id, visit_id, visit_exam_processed, created_date FROM dbo.visit_exams WHERE exam_version IS NOT NULL AND visit_exam_processed = false AND visit_exam_processing = false AND status IS NULL ORDER BY created_date ASC LIMIT 1; """ # TODO: add in audio_transcribed=true to sql query conn = udb.get_conn() df = pd.read_sql(query, conn) # validate the output if len(df) == 1: visit_exam_series = df.iloc[0] #visit_exam_series = visit_exam_series.insert(params=json.loads(visit_exam_series['reports_json'])) # visit_exam_series = visit_exam_series.append(pd.Series({'params': json.loads(visit_exam_series['reports_json'])})) #visit_exam_series['params'] = json.loads(visit_exam_series['reports_json']) elif len(df) > 1: logger.error( "number of visit exams is greater than one, check the 'get_next_visit_exam_to_process' sql code" ) else: logger.info("no visits left to process") visit_exam_series = None return visit_exam_series
def get_progress_summary(timeframe, drop_react=True, drop_test=False): """ Gets all of the visits and visit_exams during the timeframe specified in days :param timeframe: number of days prior to current_date --> e.g. timeframe = 7 is the last week :return: summary_df """ if len(timeframe) == 1: if timeframe[0]=='CURRENT_DATE': date_str = "WHERE visits.created_date > " + timeframe[0] else: date_str = "WHERE visits.created_date > '" + timeframe[0] + "'" elif len(timeframe) == 2: date_str = "WHERE visits.created_date BETWEEN '" + timeframe[0] + " 00:00:00' AND '" + timeframe[1] + " 23:59:59'" else: raise Exception("please input a valid date range, timeframe can only be 1 or 2 values") query = """ SELECT visits.id as visit_id, dbo.visit_exams.id as visit_exam_id, dbo.visit_exams.exam_id, dbo.visit_exams.exam_version, visits.created_date, visits.reports_processed, dbo.visit_exams.has_error, dbo.visit_exams.error_description, visits.s3_folder, visits.updated_at, visits.location_id, visits.device_id, dbo.subjects.first_name, dbo.subjects.id as subject_id from dbo.visits as visits LEFT JOIN dbo.visit_exams ON visits.id=dbo.visit_exams.visit_id RIGHT JOIN dbo.subjects ON visits.subject_id=dbo.subjects.id {} ORDER BY updated_at DESC """.format(date_str, timeframe) conn = udb.get_conn() summary_df = pd.read_sql(query, conn) exam_mapper = etl.reverse_dict(udb.pd_get_id_map('exams')) summary_df['exam'] = summary_df['exam_id'].apply(lambda w: exam_mapper[w] if not pd.isnull(w) else 'NaN') location_mapper = etl.reverse_dict(udb.pd_get_id_map('locations')) summary_df['location'] = summary_df['location_id'].apply(lambda w: location_mapper[w] if not pd.isnull(w) else 'NaN') if drop_react == True: summary_df = summary_df.loc[~(summary_df['location'] == 'Test Site one')] summary_df['subject'] = np.where(summary_df['first_name']=='Test', 'Test', 'User') summary_df.drop(['first_name'], axis=1, inplace=True) if drop_test == True: summary_df = summary_df.loc[~(summary_df['subject'] == 'Test')] # audio_present = summary_df.apply(udb.audio_verification, axis=1) # csv_present = summary_df.apply(udb.csv_verification, axis=1) # summary_df.assign(audio_present=audio_present, data_present=csv_present) return summary_df
def get_visit_exams_left(): query = """ SELECT id FROM dbo.visit_exams WHERE visit_exam_processed = false AND visit_exam_processing = false AND exam_version IS NOT NULL AND status IS NULL """ conn = udb.get_conn() df = pd.read_sql(query, conn) return df['id'].values
def get_subjects_table(): query = """ SELECT subjects.first_name, subjects.last_name, subjects.id from dbo.subjects as subjects ORDER BY updated_at DESC """ conn = udb.get_conn() df = pd.read_sql(query, conn) df['full_name'] = subjects_df['first_name'] + ' ' + subjects_df['last_name'] subjects_lookup_table = dict(zip(df['id'], df['full_name'])) return subjects_lookup_table
def visit_exam_in_progress(): query = """ SELECT id FROM dbo.visit_exams WHERE visit_exam_processing = true """ conn = udb.get_conn() df = pd.read_sql(query, conn) conn.close() nprocessing = len(df) if nprocessing == 0: return False elif nprocessing in [1, 2]: return df.iloc[0]['id'] else: raise Exception("{} visit_exams getting processed at once".format( str(nprocessing)))
def get_visits_by_location(timeframe): date_str = "WHERE visits.created_date > '" + timeframe[0] + "'" query = """ SELECT visits.id as visit_id, dbo.visit_exams.id as visit_exam_id, dbo.visit_exams.exam_id, dbo.visit_exams.exam_version, visits.created_date, visits.updated_at, visits.location_id, visits.device_id, dbo.subjects.first_name, dbo.subjects.last_name, dbo.subjects.id as subject_id from dbo.visits as visits LEFT JOIN dbo.visit_exams ON visits.id=dbo.visit_exams.visit_id RIGHT JOIN dbo.subjects ON visits.subject_id=dbo.subjects.id {} ORDER BY updated_at DESC """.format(date_str, timeframe) conn = udb.get_conn() df = pd.read_sql(query, conn) location_mapper = etl.reverse_dict(udb.pd_get_id_map('locations')) df['location'] = df['location_id'].apply(lambda w: location_mapper[w] if not pd.isnull(w) else 'NaN') df['month'] = df['created_date'].dt.month df['year'] = df['created_date'].dt.year df['date'] = df['created_date'].dt.date df = df[['visit_id', 'created_date', 'updated_at', 'location', 'month', 'year', 'date']] visits_by_location = df.groupby(['location', 'month', 'year']).count() vbl = visits_by_location.reset_index() vbl = vbl.drop(['created_date', 'updated_at', 'date'], axis=1) vbl = vbl.rename(columns={'visit_id': 'visits'}) month_dict = {1: 'January', 2: 'February', 3: 'March', 4: 'April', 5: 'May', 6: 'June', 7: 'July', 8: 'August', 9: 'September', 10: 'October', 11: 'November', 12: 'December'} vbl['month'] = vbl['month'].apply(lambda w: month_dict[w] if w in month_dict.keys() else w) return df, vbl