def import_data_by_record(path_to_api_creds=None): config = configparser.ConfigParser() config.read(os.path.join(os.path.dirname(__file__), 'user_settings.ini')) if path_to_api_creds == None: path_to_api_creds = config['CastorCredentials']['local_private_path'] # alternative for import_data if import_data fails due to server-side timeout errors (i.e. for large datasets) # this alternative loops over de records and report instances to load the data # input: private folder where client & secret files (no extension, 1 string only per file) from castor are saved by the user # see also: https://helpdesk.castoredc.com/article/124-application-programming-interface-api c = Castor_api(path_to_api_creds) # e.g. in user dir outside of GIT repo # get study ID for COVID study c.select_study_by_name(config['CastorCredentials']['study_name']) df_study, df_structure_study, df_report, df_structure_report, df_optiongroups_structure = c.records_reports_all(report_names=['Daily']) # remove test institute and archived (deleted) records test_inst = [i for i in c.request_institutes() if 'test' in i['name'].lower()][0] test_records = [r['record_id'] for r in c.request_study_records(institute=test_inst['institute_id'])] test_records += [r['record_id'] for r in c.request_study_records() if r['archived']==1] df_study.drop(index=df_study[df_study['Record Id'].isin(test_records)].index, inplace=True) df_report.drop(index=df_report[df_report['Record Id'].isin(test_records)].index, inplace=True) return df_study, df_structure_study, df_report, df_structure_report, df_optiongroups_structure
def get_units(cols_input): # connect to castor api to fetch information on variable lists config = configparser.ConfigParser() config.read( '../user_settings.ini') # create this once using and never upload path_creds = config['CastorCredentials']['local_private_path'] c = Castor_api(path_creds) c.select_study_by_name(config['CastorCredentials']['study_name']) optiongroups = c.request_study_export_optiongroups() studystruct = c.request_study_export_structure() cols = pd.Series(cols_input) units = pd.Series(cols_input) units[:] = '' lookup_dict, numeric_vars = get_unit_lookup_dict() for variable in cols.to_list(): if variable in numeric_vars: # the one with 1.0 as conversion factor is used. for ind, conversion in lookup_dict[numeric_vars[variable]].items(): if conversion == 1.0: option_group_id = studystruct['Field Option Group'][ studystruct['Field Variable Name'] == numeric_vars[variable]] options = optiongroups[['Option Name', 'Option Value' ]][optiongroups['Option Group Id'] == option_group_id.values[0]] unit = options['Option Name'][ options['Option Value'].values.astype(int) == ind] units[cols == variable] = unit.values[0] return units.to_list()
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ check_institute_status.py summarizes the data of a center Created on Thu Mar 26 21:51:39 2020 @author: wouterpotters """ import time, statistics from castor_api import Castor_api c = Castor_api( '/Users/wouterpotters/Desktop/') # e.g. in user dir outside of GIT repo # get study ID for COVID study study_id = c.request_study_id('COVID')[0] center = 'AUMC - VUmc' center = 'AUMC - AMC' center = 'MUMC' records = c.request_study_records(study_id) count = (len([ x['_embedded']['institute']['name'] for x in records if x['_embedded']['institute']['name'] == center and x['archived'] == False ])) completion_rate = [ x['progress'] for x in records if x['_embedded']['institute']['name'] == center and x['archived'] == False ] completion_rate_100 = sum([
""" update_slack_AMC.py creates an update every 10 minutes (if data has been added) Created on Thu Mar 26 21:51:39 2020 @author: wouterpotters """ import time, statistics, os, site, sys site.addsitedir( './../') # add directory to path to enable import of castor_api from castor_api import Castor_api # put both the secret, client and the tokens_slack file here location_castor_slack_api_data = '/Users/wouterpotters/Desktop/' c = Castor_api( location_castor_slack_api_data) # e.g. in user dir outside of GIT repo # get study ID for COVID study study_id = c.request_study_id('COVID')[0] # Posting to a Slack channel def send_message_to_slack(text): from urllib import request import json post = {"text": "{0}".format(text)} try: json_data = json.dumps(post) # the tokens_slack file should contain the full URL with the token to submit data to slack
config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini')) # the excel file with all variables and answer options is stored here target_excel = config['exportresults']['excel_file_variables'] # folder with all figures figure_dir = config['exportresults']['figures_folder'] # # Get all data from Castor database (without any selection criterium) # Note that you need export rights for every individual center. if False: study_data, study_struct, reports_data, reports_struct, optiongroups_struct = covid19_import.import_data_by_record( config['CastorCredentials']['local_private_path']) # get progression for each record c = Castor_api(config['CastorCredentials']['local_private_path']) c.select_study_by_name(config['CastorCredentials']['study_name']) records = pd.DataFrame(c.request_study_records()) progress = records['progress'] study_data['progress'] = progress study_data_orig = study_data with open( str( os.path.join(config['CastorCredentials']['local_private_path'], 'objs.pkl')), 'wb') as f: # Python 3: open(..., 'wb') pickle.dump([ study_data_orig, study_struct, reports_data, reports_struct, optiongroups_struct ], f)
# -*- coding: utf-8 -*- """ Created on Fri Apr 3 12:46:07 2020 @author: wouterpotters """ import site site.addsitedir( './../') # add directory to path to enable import of castor_api from castor_api import Castor_api import configparser config = configparser.ConfigParser() config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini')) c = Castor_api(config['CastorCredentials']['local_private_path']) study_id = c.select_study_by_name(config['CastorCredentials']['study_name']) varname = 'Outcome' # select AMC + VUmc + MUMC institutes = c.request_institutes() inst_amc_vumc = [ inst['institute_id'] for inst in c.request_institutes() if (inst['name'] == 'AUMC - AMC' or inst['name'] == 'AUMC - VUmc' or inst['name'] == 'MUMC') ] records = c.request_study_records( institute=inst_amc_vumc[0]) + c.request_study_records( institute=inst_amc_vumc[1]) options = c.request_fieldoptiongroup(
# -*- coding: utf-8 -*- """ check_institute_status.py summarizes the data of a center Created on Thu Mar 26 21:51:39 2020 @author: wouterpotters """ import time, statistics from castor_api import Castor_api import configparser config = configparser.ConfigParser() config.read('../user_settings.ini') c = Castor_api(config['CastorCredentials'] ['local_private_path']) # e.g. in user dir outside of GIT repo # get study ID for COVID study study_id = c.select_study_by_name(config['CastorCredentials']['study_name']) center = 'AUMC - VUmc' center = 'AUMC - AMC' center = 'MUMC' records = c.request_study_records() count = (len([ x['_embedded']['institute']['name'] for x in records if x['_embedded']['institute']['name'] == center and x['archived'] == False ])) completion_rate = [ x['progress'] for x in records
def import_study_report_structure(path_to_api_creds=None): config = configparser.ConfigParser() config.read(os.path.join(os.path.dirname(__file__), 'user_settings.ini')) if path_to_api_creds == None: path_to_api_creds = config['CastorCredentials']['local_private_path'] # input: private folder where client & secret files (no extension, 1 string only per file) from castor are saved by the user # see also: https://helpdesk.castoredc.com/article/124-application-programming-interface-api c = Castor_api(path_to_api_creds) # e.g. in user dir outside of GIT repo # get study ID for COVID study c.select_study_by_name(config['CastorCredentials']['study_name']) ### STEP 0: collect answer options from optiongroups # get answer option groups optiongroups_struct = c.request_study_export_optiongroups() ### STEP 1: collect data from study # get the main study structure (i.e. questions) study_structure = c.request_study_export_structure() # filter unused columns # sort fields study_structure_filtered = study_structure \ .filter(['Form Type', 'Form Collection Name', 'Form Collection Order', 'Form Name', 'Form Order', 'Field Variable Name', 'Field Label', 'Field ID', 'Field Type', 'Field Order', 'Calculation Template', 'Field Option Group'],axis=1) \ .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) # sort on form collection order and field order (this matches how data is filled) # filter datatypes that are (most of the times) unusable for ML model; i.e. custom entries # filter variables that are repeated measurements (i.e. reports data). # filter variables that have no Field Variable name (additional remarks by user?) study_structure_filtered = study_structure_filtered[study_structure_filtered['Field Type'].isin(['radio', 'date', 'dropdown', 'checkbox', 'string', 'numeric', 'calculation', 'time']) \ & study_structure_filtered['Form Type'].isin(['Study']) \ & ~(study_structure_filtered['Field Variable Name'].isna())] # keep only study forms; reports can exist multiple times and should be summarized. # filter relevant columns for reports variables # sort on form collection order and field order (this matches how data is filled) reports_structure_filtered = study_structure.filter(['Form Type', 'Form Collection Name', 'Form Collection Order', 'Form Name', 'Form Order', 'Field Variable Name', 'Field Label', 'Field ID', 'Field Type', 'Field Order', 'Calculation Template', 'Field Option Group'],axis=1) \ .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) # filter datatypes that are (most of the times) unusable for ML model; i.e. custom entries # filter variables that are repeated measurements (i.e. reports data). # filter variables that have no Field Variable name (additional remarks by user?) reports_structure_filtered = reports_structure_filtered[reports_structure_filtered['Field Type'] \ .isin(['radio', 'date', 'dropdown', 'checkbox', 'string', 'numeric', 'calculation', 'time'])] reports_structure_filtered = reports_structure_filtered[reports_structure_filtered['Form Type'].isin(['Report'])] reports_structure_filtered = reports_structure_filtered[(~reports_structure_filtered['Field Variable Name'].isna())] reports_structure_filtered = reports_structure_filtered[(reports_structure_filtered['Form Collection Name'].isin(['Daily case record form']))] return study_structure_filtered, reports_structure_filtered, optiongroups_struct
def import_data(path_to_api_creds=None): ### STEP 0: connect to API config = configparser.ConfigParser() config.read('user_settings.ini') # create this once using covid19_createconfig and never upload this file to git. if path_to_api_creds == None: path_to_api_creds = config['CastorCredentials']['local_private_path'] # input: private folder where client & secret files (no extension, 1 string only per file) from castor are saved by the user # see also: https://helpdesk.castoredc.com/article/124-application-programming-interface-api c = Castor_api(path_to_api_creds) # e.g. in user dir outside of GIT repo # get study ID for COVID study c.select_study_by_name(config['CastorCredentials']['study_name']) ### STEP 0: collect answer options from optiongroups # get answer option groups optiongroups_struct = c.request_study_export_optiongroups() ### STEP 1: collect data from study # get the main study structure (i.e. questions) study_structure = c.request_study_export_structure() # filter unused columns # sort fields study_structure_filtered = study_structure \ .filter(['Form Type', 'Form Collection Name', 'Form Collection Order', 'Form Name', 'Form Order', 'Field Variable Name', 'Field Label', 'Field ID', 'Field Type', 'Field Order', 'Calculation Template', 'Field Option Group'],axis=1) \ .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) # sort on form collection order and field order (this matches how data is filled) # filter datatypes that are (most of the times) unusable for ML model; i.e. custom entries # filter variables that are repeated measurements (i.e. reports data). # filter variables that have no Field Variable name (additional remarks by user?) study_structure_filtered = study_structure_filtered[study_structure_filtered['Field Type'].isin(['radio', 'date', 'dropdown', 'checkbox', 'string', 'numeric', 'calculation', 'time']) \ & study_structure_filtered['Form Type'].isin(['Study']) \ & ~(study_structure_filtered['Field Variable Name'].isna())] # keep only study forms; reports can exist multiple times and should be summarized. # Get study data study_data = c.request_study_export_data() # Filter data tbat is not a study entry (i.e. reports, complications) - repeated measures; should be summarized first # Filter archived data (=DELETED data) # Filter all patients from test institute (=TEST patient) study_data['Record ID'] = study_data['Record ID'].astype(str) study_data_filtered = study_data[study_data['Form Type'].isin(['Study']) \ & (~study_data['Record ID'].str.match('^ARCHIVED-.*')) \ & (~study_data['Record ID'].str.match('000001'))]\ .filter(['Record ID','Field ID','Form Type','Value','Date'],axis=1) # combine study data (patients and values) and study structure (variables) study_data_final = pandas.merge(study_structure_filtered[['Field Variable Name','Field ID']], \ study_data_filtered[['Record ID','Value','Field ID']], \ on='Field ID') \ .pivot(index='Record ID',columns='Field Variable Name',values='Value') ### STEP 2A: collect data from DAILY reports # get raw data without deleted and test data, ignore junk form instances reports_data_filtered = study_data[study_data['Form Type'].isin(['Report']) \ & (~study_data['Record ID'].str.match('^ARCHIVED-.*')) \ & (~study_data['Record ID'].str.match('000001'))] reports_data_filtered = reports_data_filtered[(~reports_data_filtered['Form Instance ID'].isna())] # problem: daily reports are dynamic, changing over time. As are their ID's. On top of that people can rename the form. # solution: look for all reports that start with 'Daily' and find their Form Instance ID. Then use that to select all reports. daily_report_form_instance_IDs = reports_data_filtered['Form Instance ID'][reports_data_filtered['Form Instance Name'].str.match('^Daily .*')].unique() daily_report_true = [s in daily_report_form_instance_IDs for s in reports_data_filtered['Form Instance ID']] reports_data_filtered = reports_data_filtered[daily_report_true] reports_data_filtered = reports_data_filtered.filter(['Record ID','Field ID','Form Type','Form Instance ID','Form Instance Name','Value','Date']) # filter relevant columns for reports variables # sort on form collection order and field order (this matches how data is filled) reports_structure_filtered = study_structure.filter(['Form Type', 'Form Collection Name', 'Form Collection Order', 'Form Name', 'Form Order', 'Field Variable Name', 'Field Label', 'Field ID', 'Field Type', 'Field Order', 'Calculation Template', 'Field Option Group'],axis=1) \ .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) # filter datatypes that are (most of the times) unusable for ML model; i.e. custom entries # filter variables that are repeated measurements (i.e. reports data). # filter variables that have no Field Variable name (additional remarks by user?) reports_structure_filtered = reports_structure_filtered[reports_structure_filtered['Field Type'] \ .isin(['radio', 'date', 'dropdown', 'checkbox', 'string', 'numeric', 'calculation', 'time'])] reports_structure_filtered = reports_structure_filtered[reports_structure_filtered['Form Type'].isin(['Report'])] reports_structure_filtered = reports_structure_filtered[(~reports_structure_filtered['Field Variable Name'].isna())] reports_structure_filtered = reports_structure_filtered[(reports_structure_filtered['Form Collection Name'].isin(['Daily case record form']))] # merge the structure and the data to get full dataset reports_data_all = pandas.merge(reports_structure_filtered[['Field Variable Name','Field ID']],\ reports_data_filtered[['Record ID','Value','Form Instance ID','Field ID']],\ on='Field ID')\ .pivot(index='Form Instance ID',columns='Field Variable Name',values='Value') # Record ID has vanished; now add Record ID again. (probably smarter to do this using pivot_table, but cant figure this out) reports_data_all = pandas.merge(reports_data_all,reports_data_filtered[['Record ID','Form Instance ID']], on='Form Instance ID')\ .drop_duplicates() # reorganize data to put record id and assesment date in front. cols = reports_data_all.columns.tolist() cols.insert(0, cols.pop(cols.index('assessment_dt'))) # admission date ICU according to report cols.insert(0, cols.pop(cols.index('Record ID'))) cols.pop(cols.index('Form Instance ID')) # drop this one, not needed reports_data_final = reports_data_all.reindex(columns= cols) ### STEP 2B: collect data from COMPLICATIONS reports # PLEASE NOTE THAT THIS WORKS, but as of 31/3 no complications data is present; hence this option is disabled. # if you enable it, make sure to add two outputs as well. # complications_struct = study_structure \ # .filter(['Form Type', 'Form Collection Name', # 'Form Collection Order', 'Form Name', 'Form Order', # 'Field Variable Name', 'Field Label', 'Field ID', 'Field Type', # 'Field Order', 'Calculation Template', # 'Field Option Group'],axis=1) \ # .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) # complications_struct = complications_struct[complications_struct['Form Type'].isin(['Report'])] # complications_struct = complications_struct[(~complications_struct['Field Variable Name'].isna())] # complications_struct = complications_struct[(complications_struct['Form Collection Name'].isin(['Complications']))] # # TODO: get actual complications # # get raw data without deleted and test data, ignore junk form instances # complications_data = study_data[study_data['Form Type'].isin(['Complications'])] # complications_data_filtered = complications_data[(~complications_data['Form Instance ID'].isna())] # # problem: daily reports are dynamic, changing over time. As are their ID's. On top of that people can rename the form. # # solution: look for all reports that start with 'Daily' and find their Form Instance ID. Then use that to select all reports. # complication_form_instance_IDs = complications_data_filtered['Form Instance ID'][complications_data_filtered['Form Instance Name'].str.match('.*Complications.*')].unique() # print(complication_form_instance_IDs) # complication_true = [s in complication_form_instance_IDs for s in complications_data_filtered['Form Instance ID']] # complications_data_filtered = complications_data_filtered[complication_true] # complications_data_filtered = complications_data_filtered.filter(['Record ID','Field ID','Form Type','Form Instance ID','Form Instance Name','Value','Date']) ## STEP 3: CLEANUP del(c, cols, reports_data_filtered, reports_data_all, study_structure) del(study_data_filtered,study_data,daily_report_form_instance_IDs,daily_report_true) ## STEP 4: RETURN THIS DATA # study data: # study_structure_filtered # study_data_final # note that record ID is the named index # reports data; # reports_structure_filtered # reports_data_final # note that record ID can not be the named index, because multiple entries exist. ## STEP 5: (TODO) summarize data from reports and add the summary stats to study_data_final # TODO return study_data_final, study_structure_filtered,reports_data_final, reports_structure_filtered, optiongroups_struct
# -*- coding: utf-8 -*- """ Created on Fri Apr 3 12:46:07 2020 @author: wouterpotters """ import site site.addsitedir( './../') # add directory to path to enable import of castor_api from castor_api import Castor_api import configparser config = configparser.ConfigParser() config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini')) c = Castor_api(config['CastorCredentials']['local_private_path']) study_id = c.select_study_by_name(config['CastorCredentials']['study_name']) # select AMC + VUmc institutes = c.request_institutes() inst_amc_vumc = [ inst['institute_id'] for inst in c.request_institutes() if (inst['name'] == 'AUMC - AMC' or inst['name'] == 'AUMC - VUmc') ] records = c.request_study_records( institute=inst_amc_vumc[0]) + c.request_study_records( institute=inst_amc_vumc[1]) ct_perf_values = c.field_values_by_variable_name('CT_thorax_performed', records=records) corad_values = c.field_values_by_variable_name('corads_admission',
@author: wouterpotters """ import site, pandas as pd site.addsitedir( './../') # add directory to path to enable import of castor_api from castor_api import Castor_api from datetime import datetime, timedelta import configparser config = configparser.ConfigParser() config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini')) c = Castor_api(config['CastorCredentials']['local_private_path']) study_id = c.select_study_by_name(config['CastorCredentials']['study_name']) # select AMC + VUmc institutes = c.request_institutes() inst_amc_vumc = [ inst['institute_id'] for inst in c.request_institutes() if inst['name'] == 'AUMC - AMC' ] records = c.request_study_records(institute=inst_amc_vumc[0]) # % records = [ r for r in records if r['progress'] < 95 and r['progress'] > 5 and ( pd.to_datetime(r['created_on']['date']) < datetime.today() - timedelta(days=3))