Ejemplo n.º 1
0
def import_data_by_record(path_to_api_creds=None):
    config = configparser.ConfigParser()
    config.read(os.path.join(os.path.dirname(__file__), 'user_settings.ini'))
    
    if path_to_api_creds == None:
        path_to_api_creds = config['CastorCredentials']['local_private_path']
    # alternative for import_data if import_data fails due to server-side timeout errors (i.e. for large datasets)
    # this alternative loops over de records and report instances to load the data
    
    # input: private folder where client & secret files (no extension, 1 string only per file) from castor are saved by the user
    # see also: https://helpdesk.castoredc.com/article/124-application-programming-interface-api
    c = Castor_api(path_to_api_creds) # e.g. in user dir outside of GIT repo
    
    # get study ID for COVID study
    c.select_study_by_name(config['CastorCredentials']['study_name'])    
    
    df_study, df_structure_study, df_report, df_structure_report, df_optiongroups_structure = c.records_reports_all(report_names=['Daily'])
    
    # remove test institute and archived (deleted) records
    test_inst = [i for i in c.request_institutes() if 'test' in i['name'].lower()][0]
    test_records = [r['record_id'] for r in c.request_study_records(institute=test_inst['institute_id'])]
    test_records += [r['record_id'] for r in c.request_study_records() if r['archived']==1]

    df_study.drop(index=df_study[df_study['Record Id'].isin(test_records)].index, inplace=True)
    df_report.drop(index=df_report[df_report['Record Id'].isin(test_records)].index, inplace=True)
    
    return df_study, df_structure_study, df_report, df_structure_report, df_optiongroups_structure
Ejemplo n.º 2
0
def get_units(cols_input):
    # connect to castor api to fetch information on variable lists
    config = configparser.ConfigParser()
    config.read(
        '../user_settings.ini')  # create this once using and never upload

    path_creds = config['CastorCredentials']['local_private_path']
    c = Castor_api(path_creds)
    c.select_study_by_name(config['CastorCredentials']['study_name'])
    optiongroups = c.request_study_export_optiongroups()
    studystruct = c.request_study_export_structure()

    cols = pd.Series(cols_input)
    units = pd.Series(cols_input)
    units[:] = ''
    lookup_dict, numeric_vars = get_unit_lookup_dict()
    for variable in cols.to_list():
        if variable in numeric_vars:
            # the one with 1.0 as conversion factor is used.
            for ind, conversion in lookup_dict[numeric_vars[variable]].items():
                if conversion == 1.0:
                    option_group_id = studystruct['Field Option Group'][
                        studystruct['Field Variable Name'] ==
                        numeric_vars[variable]]
                    options = optiongroups[['Option Name', 'Option Value'
                                            ]][optiongroups['Option Group Id']
                                               == option_group_id.values[0]]
                    unit = options['Option Name'][
                        options['Option Value'].values.astype(int) == ind]
                    units[cols == variable] = unit.values[0]
    return units.to_list()
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
check_institute_status.py summarizes the data of a center

Created on Thu Mar 26 21:51:39 2020

@author: wouterpotters
"""
import time, statistics
from castor_api import Castor_api
c = Castor_api(
    '/Users/wouterpotters/Desktop/')  # e.g. in user dir outside of GIT repo

# get study ID for COVID study
study_id = c.request_study_id('COVID')[0]

center = 'AUMC - VUmc'
center = 'AUMC - AMC'
center = 'MUMC'

records = c.request_study_records(study_id)
count = (len([
    x['_embedded']['institute']['name'] for x in records
    if x['_embedded']['institute']['name'] == center and x['archived'] == False
]))
completion_rate = [
    x['progress'] for x in records
    if x['_embedded']['institute']['name'] == center and x['archived'] == False
]
completion_rate_100 = sum([
Ejemplo n.º 4
0
"""
update_slack_AMC.py creates an update every 10 minutes (if data has been added)

Created on Thu Mar 26 21:51:39 2020

@author: wouterpotters
"""
import time, statistics, os, site, sys
site.addsitedir(
    './../')  # add directory to path to enable import of castor_api
from castor_api import Castor_api

# put both the secret, client and the tokens_slack file here
location_castor_slack_api_data = '/Users/wouterpotters/Desktop/'

c = Castor_api(
    location_castor_slack_api_data)  # e.g. in user dir outside of GIT repo

# get study ID for COVID study
study_id = c.request_study_id('COVID')[0]


# Posting to a Slack channel
def send_message_to_slack(text):
    from urllib import request
    import json
    post = {"text": "{0}".format(text)}

    try:
        json_data = json.dumps(post)

        # the tokens_slack file should contain the full URL with the token to submit data to slack
Ejemplo n.º 5
0
config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini'))

# the excel file with all variables and answer options is stored here
target_excel = config['exportresults']['excel_file_variables']

# folder with all figures
figure_dir = config['exportresults']['figures_folder']

# # Get all data from Castor database (without any selection criterium)
# Note that you need export rights for every individual center.
if False:
    study_data, study_struct, reports_data, reports_struct, optiongroups_struct = covid19_import.import_data_by_record(
        config['CastorCredentials']['local_private_path'])

    # get progression for each record
    c = Castor_api(config['CastorCredentials']['local_private_path'])
    c.select_study_by_name(config['CastorCredentials']['study_name'])
    records = pd.DataFrame(c.request_study_records())
    progress = records['progress']
    study_data['progress'] = progress
    study_data_orig = study_data

    with open(
            str(
                os.path.join(config['CastorCredentials']['local_private_path'],
                             'objs.pkl')),
            'wb') as f:  # Python 3: open(..., 'wb')
        pickle.dump([
            study_data_orig, study_struct, reports_data, reports_struct,
            optiongroups_struct
        ], f)
# -*- coding: utf-8 -*-
"""
Created on Fri Apr  3 12:46:07 2020

@author: wouterpotters
"""
import site
site.addsitedir(
    './../')  # add directory to path to enable import of castor_api
from castor_api import Castor_api

import configparser
config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini'))

c = Castor_api(config['CastorCredentials']['local_private_path'])
study_id = c.select_study_by_name(config['CastorCredentials']['study_name'])
varname = 'Outcome'

# select AMC + VUmc + MUMC
institutes = c.request_institutes()
inst_amc_vumc = [
    inst['institute_id'] for inst in c.request_institutes()
    if (inst['name'] == 'AUMC - AMC' or inst['name'] == 'AUMC - VUmc'
        or inst['name'] == 'MUMC')
]
records = c.request_study_records(
    institute=inst_amc_vumc[0]) + c.request_study_records(
        institute=inst_amc_vumc[1])

options = c.request_fieldoptiongroup(
Ejemplo n.º 7
0
# -*- coding: utf-8 -*-
"""
check_institute_status.py summarizes the data of a center

Created on Thu Mar 26 21:51:39 2020

@author: wouterpotters
"""
import time, statistics
from castor_api import Castor_api

import configparser
config = configparser.ConfigParser()
config.read('../user_settings.ini')

c = Castor_api(config['CastorCredentials']
               ['local_private_path'])  # e.g. in user dir outside of GIT repo

# get study ID for COVID study
study_id = c.select_study_by_name(config['CastorCredentials']['study_name'])

center = 'AUMC - VUmc'
center = 'AUMC - AMC'
center = 'MUMC'

records = c.request_study_records()
count = (len([
    x['_embedded']['institute']['name'] for x in records
    if x['_embedded']['institute']['name'] == center and x['archived'] == False
]))
completion_rate = [
    x['progress'] for x in records
Ejemplo n.º 8
0
def import_study_report_structure(path_to_api_creds=None):
    config = configparser.ConfigParser()
    config.read(os.path.join(os.path.dirname(__file__), 'user_settings.ini'))

    if path_to_api_creds == None:
        path_to_api_creds = config['CastorCredentials']['local_private_path']
        
    # input: private folder where client & secret files (no extension, 1 string only per file) from castor are saved by the user
    # see also: https://helpdesk.castoredc.com/article/124-application-programming-interface-api
    c = Castor_api(path_to_api_creds) # e.g. in user dir outside of GIT repo
    
    # get study ID for COVID study
    c.select_study_by_name(config['CastorCredentials']['study_name'])    
    
    ### STEP 0: collect answer options from optiongroups
    
    # get answer option groups
    optiongroups_struct = c.request_study_export_optiongroups()
    
    ### STEP 1: collect data from study
    
    # get the main study structure (i.e. questions)
    study_structure = c.request_study_export_structure()
    
    # filter unused columns
    # sort fields
    study_structure_filtered = study_structure \
        .filter(['Form Type', 'Form Collection Name',
           'Form Collection Order', 'Form Name', 'Form Order',
           'Field Variable Name', 'Field Label', 'Field ID', 'Field Type',
           'Field Order', 'Calculation Template',
           'Field Option Group'],axis=1) \
    .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) # sort on form collection order and field order (this matches how data is filled)
    
    # filter datatypes that are (most of the times) unusable for ML model; i.e. custom entries
    # filter variables that are repeated measurements (i.e. reports data).
    # filter variables that have no Field Variable name (additional remarks by user?)
    study_structure_filtered = study_structure_filtered[study_structure_filtered['Field Type'].isin(['radio', 'date', 'dropdown', 'checkbox', 
                                                                                 'string', 'numeric', 'calculation', 'time']) \
                                                        & study_structure_filtered['Form Type'].isin(['Study']) \
                                                        & ~(study_structure_filtered['Field Variable Name'].isna())] # keep only study forms; reports can exist multiple times and should be summarized.
    
    
    # filter relevant columns for reports variables
    # sort on form collection order and field order (this matches how data is filled)
    reports_structure_filtered = study_structure.filter(['Form Type', 'Form Collection Name',
                                                         'Form Collection Order', 'Form Name', 'Form Order',
                                                         'Field Variable Name', 'Field Label', 'Field ID', 'Field Type',
                                                         'Field Order', 'Calculation Template',
                                                         'Field Option Group'],axis=1) \
                                                .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) 
    
    
    # filter datatypes that are (most of the times) unusable for ML model; i.e. custom entries
    # filter variables that are repeated measurements (i.e. reports data).
    # filter variables that have no Field Variable name (additional remarks by user?)
    reports_structure_filtered = reports_structure_filtered[reports_structure_filtered['Field Type'] \
                                                               .isin(['radio', 'date', 'dropdown', 'checkbox', 
                                                                      'string', 'numeric', 'calculation', 'time'])]
    reports_structure_filtered = reports_structure_filtered[reports_structure_filtered['Form Type'].isin(['Report'])]
    reports_structure_filtered = reports_structure_filtered[(~reports_structure_filtered['Field Variable Name'].isna())]
    reports_structure_filtered = reports_structure_filtered[(reports_structure_filtered['Form Collection Name'].isin(['Daily case record form']))]

    return study_structure_filtered, reports_structure_filtered, optiongroups_struct
Ejemplo n.º 9
0
def import_data(path_to_api_creds=None):
    ### STEP 0: connect to API
    config = configparser.ConfigParser()
    config.read('user_settings.ini') # create this once using covid19_createconfig and never upload this file to git.

    if path_to_api_creds == None:
        path_to_api_creds = config['CastorCredentials']['local_private_path']
        
    # input: private folder where client & secret files (no extension, 1 string only per file) from castor are saved by the user
    # see also: https://helpdesk.castoredc.com/article/124-application-programming-interface-api
    c = Castor_api(path_to_api_creds) # e.g. in user dir outside of GIT repo
    
    # get study ID for COVID study
    c.select_study_by_name(config['CastorCredentials']['study_name'])    
    
    ### STEP 0: collect answer options from optiongroups
    
    # get answer option groups
    optiongroups_struct = c.request_study_export_optiongroups()
    
    ### STEP 1: collect data from study
    
    # get the main study structure (i.e. questions)
    study_structure = c.request_study_export_structure()
    
    # filter unused columns
    # sort fields
    study_structure_filtered = study_structure \
        .filter(['Form Type', 'Form Collection Name',
           'Form Collection Order', 'Form Name', 'Form Order',
           'Field Variable Name', 'Field Label', 'Field ID', 'Field Type',
           'Field Order', 'Calculation Template',
           'Field Option Group'],axis=1) \
    .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) # sort on form collection order and field order (this matches how data is filled)
    
    # filter datatypes that are (most of the times) unusable for ML model; i.e. custom entries
    # filter variables that are repeated measurements (i.e. reports data).
    # filter variables that have no Field Variable name (additional remarks by user?)
    study_structure_filtered = study_structure_filtered[study_structure_filtered['Field Type'].isin(['radio', 'date', 'dropdown', 'checkbox', 
                                                                                 'string', 'numeric', 'calculation', 'time']) \
                                                        & study_structure_filtered['Form Type'].isin(['Study']) \
                                                        & ~(study_structure_filtered['Field Variable Name'].isna())] # keep only study forms; reports can exist multiple times and should be summarized.
    
    # Get study data
    study_data = c.request_study_export_data()
    
    # Filter data tbat is not a study entry (i.e. reports, complications) - repeated measures; should be summarized first
    # Filter archived data (=DELETED data)
    # Filter all patients from test institute (=TEST patient)
    study_data['Record ID'] = study_data['Record ID'].astype(str)
    study_data_filtered = study_data[study_data['Form Type'].isin(['Study']) \
                                              & (~study_data['Record ID'].str.match('^ARCHIVED-.*')) \
                                              & (~study_data['Record ID'].str.match('000001'))]\
                                          .filter(['Record ID','Field ID','Form Type','Value','Date'],axis=1)
    
    # combine study data (patients and values) and study structure (variables)
    study_data_final = pandas.merge(study_structure_filtered[['Field Variable Name','Field ID']], \
                                    study_data_filtered[['Record ID','Value','Field ID']], \
                                    on='Field ID') \
                             .pivot(index='Record ID',columns='Field Variable Name',values='Value')
    
    
    ### STEP 2A: collect data from DAILY reports
    
    # get raw data without deleted and test data, ignore junk form instances
    reports_data_filtered = study_data[study_data['Form Type'].isin(['Report']) \
                                          & (~study_data['Record ID'].str.match('^ARCHIVED-.*')) \
                                          & (~study_data['Record ID'].str.match('000001'))]
    reports_data_filtered = reports_data_filtered[(~reports_data_filtered['Form Instance ID'].isna())]
    
    
    # problem: daily reports are dynamic, changing over time. As are their ID's. On top of that people can rename the form.
    # solution: look for all reports that start with 'Daily' and find their Form Instance ID. Then use that to select all reports.
    daily_report_form_instance_IDs = reports_data_filtered['Form Instance ID'][reports_data_filtered['Form Instance Name'].str.match('^Daily .*')].unique() 
    daily_report_true = [s in daily_report_form_instance_IDs for s in reports_data_filtered['Form Instance ID']]
    reports_data_filtered = reports_data_filtered[daily_report_true]
    reports_data_filtered = reports_data_filtered.filter(['Record ID','Field ID','Form Type','Form Instance ID','Form Instance Name','Value','Date'])
    
    # filter relevant columns for reports variables
    # sort on form collection order and field order (this matches how data is filled)
    reports_structure_filtered = study_structure.filter(['Form Type', 'Form Collection Name',
                                                         'Form Collection Order', 'Form Name', 'Form Order',
                                                         'Field Variable Name', 'Field Label', 'Field ID', 'Field Type',
                                                         'Field Order', 'Calculation Template',
                                                         'Field Option Group'],axis=1) \
                                                .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order']) 
    
    
    # filter datatypes that are (most of the times) unusable for ML model; i.e. custom entries
    # filter variables that are repeated measurements (i.e. reports data).
    # filter variables that have no Field Variable name (additional remarks by user?)
    reports_structure_filtered = reports_structure_filtered[reports_structure_filtered['Field Type'] \
                                                               .isin(['radio', 'date', 'dropdown', 'checkbox', 
                                                                      'string', 'numeric', 'calculation', 'time'])]
    reports_structure_filtered = reports_structure_filtered[reports_structure_filtered['Form Type'].isin(['Report'])]
    reports_structure_filtered = reports_structure_filtered[(~reports_structure_filtered['Field Variable Name'].isna())]
    reports_structure_filtered = reports_structure_filtered[(reports_structure_filtered['Form Collection Name'].isin(['Daily case record form']))]
    # merge the structure and the data to get full dataset 
    reports_data_all = pandas.merge(reports_structure_filtered[['Field Variable Name','Field ID']],\
                                    reports_data_filtered[['Record ID','Value','Form Instance ID','Field ID']],\
                                    on='Field ID')\
                             .pivot(index='Form Instance ID',columns='Field Variable Name',values='Value')
    
    # Record ID has vanished; now add Record ID again. (probably smarter to do this using pivot_table, but cant figure this out)
    reports_data_all = pandas.merge(reports_data_all,reports_data_filtered[['Record ID','Form Instance ID']], on='Form Instance ID')\
                             .drop_duplicates()
    
    # reorganize data to put record id and assesment date in front.
    cols = reports_data_all.columns.tolist()
    cols.insert(0, cols.pop(cols.index('assessment_dt'))) # admission date ICU according to report
    cols.insert(0, cols.pop(cols.index('Record ID')))
    cols.pop(cols.index('Form Instance ID')) # drop this one, not needed
    reports_data_final = reports_data_all.reindex(columns= cols)
    
    ### STEP 2B: collect data from COMPLICATIONS reports
    # PLEASE NOTE THAT THIS WORKS, but as of 31/3 no complications data is present; hence this option is disabled.
    # if you enable it, make sure to add two outputs as well.
    # complications_struct = study_structure \
    # .filter(['Form Type', 'Form Collection Name',
    #        'Form Collection Order', 'Form Name', 'Form Order',
    #        'Field Variable Name', 'Field Label', 'Field ID', 'Field Type',
    #        'Field Order', 'Calculation Template',
    #        'Field Option Group'],axis=1) \
    # .sort_values(['Form Order','Form Collection Name','Form Collection Order','Field Order'])
    # complications_struct = complications_struct[complications_struct['Form Type'].isin(['Report'])]
    # complications_struct = complications_struct[(~complications_struct['Field Variable Name'].isna())]
    # complications_struct = complications_struct[(complications_struct['Form Collection Name'].isin(['Complications']))]

    # # TODO: get actual complications
    # # get raw data without deleted and test data, ignore junk form instances
    # complications_data = study_data[study_data['Form Type'].isin(['Complications'])]
    # complications_data_filtered = complications_data[(~complications_data['Form Instance ID'].isna())]
    
    # # problem: daily reports are dynamic, changing over time. As are their ID's. On top of that people can rename the form.
    # # solution: look for all reports that start with 'Daily' and find their Form Instance ID. Then use that to select all reports.
    # complication_form_instance_IDs = complications_data_filtered['Form Instance ID'][complications_data_filtered['Form Instance Name'].str.match('.*Complications.*')].unique() 
    # print(complication_form_instance_IDs)
    # complication_true = [s in complication_form_instance_IDs for s in complications_data_filtered['Form Instance ID']]
    # complications_data_filtered = complications_data_filtered[complication_true]
    # complications_data_filtered = complications_data_filtered.filter(['Record ID','Field ID','Form Type','Form Instance ID','Form Instance Name','Value','Date'])

        
    ## STEP 3: CLEANUP
    
    del(c, cols, reports_data_filtered, reports_data_all, study_structure)
    del(study_data_filtered,study_data,daily_report_form_instance_IDs,daily_report_true)
    
    
    ## STEP 4: RETURN THIS DATA
    
    # study data:
    # study_structure_filtered
    # study_data_final # note that record ID is the named index
    
    # reports data; 
    # reports_structure_filtered
    # reports_data_final # note that record ID can not be the named index, because multiple entries exist.
    
    
    ## STEP 5: (TODO) summarize data from reports and add the summary stats to study_data_final
    # TODO

    return study_data_final, study_structure_filtered,reports_data_final, reports_structure_filtered, optiongroups_struct
Ejemplo n.º 10
0
# -*- coding: utf-8 -*-
"""
Created on Fri Apr  3 12:46:07 2020

@author: wouterpotters
"""
import site
site.addsitedir(
    './../')  # add directory to path to enable import of castor_api
from castor_api import Castor_api

import configparser
config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini'))

c = Castor_api(config['CastorCredentials']['local_private_path'])
study_id = c.select_study_by_name(config['CastorCredentials']['study_name'])

# select AMC + VUmc
institutes = c.request_institutes()
inst_amc_vumc = [
    inst['institute_id'] for inst in c.request_institutes()
    if (inst['name'] == 'AUMC - AMC' or inst['name'] == 'AUMC - VUmc')
]
records = c.request_study_records(
    institute=inst_amc_vumc[0]) + c.request_study_records(
        institute=inst_amc_vumc[1])

ct_perf_values = c.field_values_by_variable_name('CT_thorax_performed',
                                                 records=records)
corad_values = c.field_values_by_variable_name('corads_admission',
@author: wouterpotters
"""
import site, pandas as pd

site.addsitedir(
    './../')  # add directory to path to enable import of castor_api
from castor_api import Castor_api
from datetime import datetime, timedelta

import configparser

config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini'))

c = Castor_api(config['CastorCredentials']['local_private_path'])
study_id = c.select_study_by_name(config['CastorCredentials']['study_name'])

# select AMC + VUmc
institutes = c.request_institutes()
inst_amc_vumc = [
    inst['institute_id'] for inst in c.request_institutes()
    if inst['name'] == 'AUMC - AMC'
]
records = c.request_study_records(institute=inst_amc_vumc[0])

# %
records = [
    r for r in records if r['progress'] < 95 and r['progress'] > 5 and (
        pd.to_datetime(r['created_on']['date']) < datetime.today() -
        timedelta(days=3))