Exemple #1
0
def get_units(cols_input):
    # connect to castor api to fetch information on variable lists
    config = configparser.ConfigParser()
    config.read('../user_settings.ini')  # create this once and never upload

    path_creds = config['CastorCredentials']['local_private_path']
    c = ca.CastorApi(path_creds)
    c.select_study_by_name(config['CastorCredentials']['study_name'])
    optiongroups = c.request_study_export_optiongroups()
    studystruct = c.request_study_export_structure()

    cols = pd.Series(cols_input)
    units = pd.Series(cols_input)
    units[:] = ''
    lookup_dict, numeric_vars = get_unit_lookup_dict()
    for variable in cols.to_list():
        if variable in numeric_vars:
            # the one with 1.0 as conversion factor is used.
            for ind, conversion in lookup_dict[numeric_vars[variable]].items():
                if conversion == 1.0:
                    option_group_id = studystruct['Field Option Group'][
                        studystruct['Field Variable Name'] ==
                        numeric_vars[variable]]
                    options = optiongroups[['Option Name', 'Option Value'
                                            ]][optiongroups['Option Group Id']
                                               == option_group_id.values[0]]
                    unit = options['Option Name'][
                        options['Option Value'].values.astype(int) == ind]
                    units[cols == variable] = unit.values[0]
    return units.to_list()
 def setUp(self):
     # this test is using dummy credentials for a fake study
     # with a fake account.
     # The secret and client id are stored in github secrets
     client_id = os.getenv('castor_clientid')
     client_secret = os.getenv('castor_secret')
     self.c = ca.CastorApi(client_id=client_id,
                           client_secret=client_secret)
Exemple #3
0
def import_study_report_structure(path_to_api_creds=None,
                                  dailyreportsonly=True):
    config = configparser.ConfigParser()
    config.read(os.path.join(os.path.dirname(__file__), 'user_settings.ini'))

    if path_to_api_creds is None:
        path_to_api_creds = config['CastorCredentials']['local_private_path']

    # input: private folder where client & secret files (no extension,
    #        1 string only per file) from castor are saved by the user
    # see also:
    # https://helpdesk.castoredc.com/article/124-application-programming-interface-api
    c = ca.CastorApi(path_to_api_creds)  # e.g. in user dir outside of GIT repo

    # get study ID for COVID study
    c.select_study_by_name(config['CastorCredentials']['study_name'])

    # STEP 0: collect answer options from optiongroups

    # get answer option groups
    optiongroups_struct = c.request_study_export_optiongroups()

    # STEP 1: collect data from study
    # get the main study structure (i.e. questions)
    structure = c.request_study_export_structure()

    # sort on form collection order and field order
    # (this matches how data is filled in castor)
    structure_filtered = structure \
        .sort_values(['Form Collection Name', 'Form Collection Order',
                      'Form Order', 'Field Order'])

    # filter variables that have no Field Variable name; these field do not
    # record data
    structure_filtered[~structure_filtered['Field Variable Name'].isna()]

    # select only study variables
    study_structure = structure_filtered[structure_filtered['Form Type'].isin(
        ['Study'])]

    # select only report variables (may contain duplicates)
    reports_structure = structure_filtered[
        structure_filtered['Form Type'].isin(['Report'])]

    if dailyreportsonly:
        reports_structure = reports_structure[
            reports_structure['Form Collection Name'].isin(
                ['Daily case record form'])]

    return study_structure, reports_structure, optiongroups_struct
Exemple #4
0
def import_data_by_record(path_to_api_creds=None):
    config = configparser.ConfigParser()
    config.read(os.path.join(os.path.dirname(__file__), 'user_settings.ini'))

    if path_to_api_creds is None:
        path_to_api_creds = config['CastorCredentials']['local_private_path']
    # alternative for import_data if import_data fails due to server-side
    # timeout errors (i.e. for large datasets);this alternative loops over
    # the records and report instances to load the data

    # input: private folder where client & secret files (no extension,
    #        1 string only per file) from castor are saved by the user
    # see also:
    # https://helpdesk.castoredc.com/article/124-application-programming-interface-api
    c = ca.CastorApi(path_to_api_creds)  # e.g. in user dir outside of GIT repo

    # get study ID for COVID study
    c.select_study_by_name(config['CastorCredentials']['study_name'])

    df_study, df_structure_study, df_report, df_structure_report,\
        df_optiongroups_structure = \
        c.records_reports_all(report_names=['Daily'],
                              add_including_center=True)

    # remove test institute and archived (deleted) records
    test_inst = [
        i for i in c.request_institutes() if 'test' in i['name'].lower()
    ][0]
    test_records = [
        r['record_id']
        for r in c.request_study_records(institute=test_inst['institute_id'])
    ]
    test_records += [
        r['record_id'] for r in c.request_study_records() if r['archived'] == 1
    ]

    df_study.drop(
        index=df_study[df_study['Record Id'].isin(test_records)].index,
        inplace=True)
    df_report.drop(
        index=df_report[df_report['Record Id'].isin(test_records)].index,
        inplace=True)

    return df_study, df_structure_study, df_report, \
        df_structure_report, df_optiongroups_structure
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Apr  3 12:46:07 2020

@author: wouterpotters
"""
import os
import castorapi as ca
import configparser

config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), '../user_settings.ini'))

c = ca.CastorApi(config['CastorCredentials']['local_private_path'])
study_id = c.select_study_by_name(config['CastorCredentials']['study_name'])
varname = 'Outcome'

# select AMC + VUmc + MUMC
institutes = c.request_institutes()
inst_amc_vumc = [inst['institute_id'] for inst in c.request_institutes()
                 if (inst['name'] == 'AUMC - AMC'
                 or inst['name'] == 'AUMC - VUmc'
                 or inst['name'] == 'MUMC')]
records = c.request_study_records(institute=inst_amc_vumc[0]) +\
    c.request_study_records(institute=inst_amc_vumc[1])

options = c.request_fieldoptiongroup(
    optiongroup_id=c.field_optiongroup_by_variable_name(varname))
values = c.field_values_by_variable_name(varname, records=records)
Exemple #6
0
def import_study_report_structure(path_to_api_creds=None):
    config = configparser.ConfigParser()
    config.read(os.path.join(os.path.dirname(__file__), 'user_settings.ini'))

    if path_to_api_creds is None:
        path_to_api_creds = config['CastorCredentials']['local_private_path']

    # input: private folder where client & secret files (no extension,
    #        1 string only per file) from castor are saved by the user
    # see also:
    # https://helpdesk.castoredc.com/article/124-application-programming-interface-api
    c = ca.CastorApi(path_to_api_creds)  # e.g. in user dir outside of GIT repo

    # get study ID for COVID study
    c.select_study_by_name(config['CastorCredentials']['study_name'])

    # STEP 0: collect answer options from optiongroups

    # get answer option groups
    optiongroups_struct = c.request_study_export_optiongroups()

    # STEP 1: collect data from study

    # get the main study structure (i.e. questions)
    study_structure = c.request_study_export_structure()

    # filter unused columns
    # sort on form collection order and field order
    # (this matches how data is filled in castor)
    study_structure_filtered = study_structure \
        .filter(['Form Type', 'Form Collection Name',
                 'Form Collection Order', 'Form Name', 'Form Order',
                 'Field Variable Name', 'Field Label', 'Field ID',
                 'Field Type', 'Field Order', 'Calculation Template',
                 'Field Option Group'], axis=1) \
        .sort_values(['Form Order', 'Form Collection Name',
                      'Form Collection Order', 'Field Order'])

    # filter datatypes that are (most of the times) unusable for ML model
    # filter variables that are repeated measurements (i.e. reports data).
    # filter variables that have no Field Variable name (remarks by user?)
    # keep only study forms; reports can exist multiple times
    study_structure_filtered = study_structure_filtered[
        study_structure_filtered['Field Type'].isin([
            'radio', 'date', 'dropdown', 'checkbox', 'string', 'numeric',
            'calculation', 'time'
        ])
        & study_structure_filtered['Form Type'].isin(['Study'])
        & ~(study_structure_filtered['Field Variable Name'].isna())]

    # filter relevant columns for reports variables
    # sort on form collection order and field order (this matches castor order)
    reports_structure_filtered = study_structure\
        .filter(['Form Type', 'Form Collection Name',
                 'Form Collection Order', 'Form Name', 'Form Order',
                 'Field Variable Name', 'Field Label', 'Field ID',
                 'Field Type', 'Field Order', 'Calculation Template',
                 'Field Option Group'], axis=1) \
        .sort_values(['Form Order', 'Form Collection Name',
                      'Form Collection Order', 'Field Order'])

    # filter datatypes that are (most of the times) unusable for ML model
    # filter variables that are repeated measurements (i.e. reports data).
    # filter variables that have no Field Variable name (additional remarks)
    reports_structure_filtered = reports_structure_filtered[
        reports_structure_filtered['Field Type'].isin([
            'radio', 'date', 'dropdown', 'checkbox', 'string', 'numeric',
            'calculation', 'time'
        ])]
    reports_structure_filtered = reports_structure_filtered[
        reports_structure_filtered['Form Type'].isin(['Report'])]
    reports_structure_filtered = reports_structure_filtered[
        ~reports_structure_filtered['Field Variable Name'].isna()]
    reports_structure_filtered = reports_structure_filtered[
        reports_structure_filtered['Form Collection Name'].isin(
            ['Daily case record form'])]

    return study_structure_filtered, reports_structure_filtered, \
        optiongroups_struct
Exemple #7
0
def import_data(path_to_api_creds=None):
    # STEP 0: connect to API
    # create this config once using covid19_createconfig
    # and never upload this file to git.
    config = configparser.ConfigParser()
    config.read('user_settings.ini')

    if path_to_api_creds is None:
        path_to_api_creds = config['CastorCredentials']['local_private_path']

    # input: private folder where client & secret files (no extension,
    # 1 string only per file) from castor are saved by the user
    # see also:
    # https://helpdesk.castoredc.com/article/124-application-programming-interface-api
    c = ca.CastorApi(path_to_api_creds)  # e.g. in user dir outside of GIT repo

    # get study ID for COVID study
    c.select_study_by_name(config['CastorCredentials']['study_name'])

    # STEP 0: collect answer options from optiongroups
    # get answer option groups
    optiongroups_struct = c.request_study_export_optiongroups()

    # STEP 1: collect data from study
    # get the main study structure (i.e. questions)
    study_structure = c.request_study_export_structure()

    # filter unused columns
    # sort on form collection order and field order
    # (this matches how data is filled in castor)
    study_structure_filtered = study_structure \
        .filter(['Form Type', 'Form Collection Name',
                 'Form Collection Order', 'Form Name', 'Form Order',
                 'Field Variable Name', 'Field Label', 'Field ID',
                 'Field Type', 'Field Order', 'Calculation Template',
                 'Field Option Group'], axis=1) \
        .sort_values(['Form Order', 'Form Collection Name',
                      'Form Collection Order', 'Field Order'])

    # filter datatypes that are (most of the times) unusable for ML model.
    # filter variables that are repeated measurements (i.e. reports data).
    # filter variables that have no Field Variable name (additional remarks?)
    # keep only study forms; reports can exist multiple times.
    study_structure_filtered = study_structure_filtered[
        study_structure_filtered['Field Type'].isin([
            'radio', 'date', 'dropdown', 'checkbox', 'string', 'numeric',
            'calculation', 'time'
        ])
        & study_structure_filtered['Form Type'].isin(['Study'])
        & ~(study_structure_filtered['Field Variable Name'].isna())]

    # Get study data
    study_data = c.request_study_export_data()

    # Filter data tbat is not a study entry (i.e. reports, complications)
    # - repeated measures; could be summarized first
    # Filter archived data (=DELETED data)
    # Filter all patients from test institute (=TEST patient)
    study_data['Record ID'] = study_data['Record ID'].astype(str)
    study_data_filtered = study_data[
        study_data['Form Type'].isin(['Study'])
        & (~study_data['Record ID'].str.match('^ARCHIVED-.*'))] \
        .filter(['Record ID', 'Field ID', 'Form Type', 'Value', 'Date'],
                axis=1)

    # combine study data (patients and values) and study structure (variables)
    study_data_final = pandas.merge(
        study_structure_filtered[['Field Variable Name', 'Field ID']],
        study_data_filtered[['Record ID', 'Value', 'Field ID']],
        on='Field ID') \
        .pivot(index='Record ID',
               columns='Field Variable Name',
               values='Value')

    # STEP 2A: collect data from DAILY reports
    # get raw data without deleted and test data, ignore junk form instances
    reports_data_filtered = study_data[
        study_data['Form Type'].isin(['Report'])
        & (~study_data['Record ID'].str.match('^ARCHIVED-.*'))
        & (~study_data['Record ID'].str.match('000001'))]
    reports_data_filtered = reports_data_filtered[
        ~reports_data_filtered['Form Instance ID'].isna()]

    # problem: daily reports are dynamic, changing over time.
    #          As are their ID's. On top of that people can rename the form.
    # solution: look for all reports that start with 'Daily' and find their
    #           Form Instance ID. Then use that to select all reports.
    daily_report_form_instance_IDs = \
        reports_data_filtered['Form Instance ID'][
            reports_data_filtered['Form Instance Name']
            .str.match('^Daily .*')].unique()
    daily_report_true = [
        s in daily_report_form_instance_IDs
        for s in reports_data_filtered['Form Instance ID']
    ]
    reports_data_filtered = reports_data_filtered[daily_report_true]
    reports_data_filtered = reports_data_filtered.filter([
        'Record ID', 'Field ID', 'Form Type', 'Form Instance ID',
        'Form Instance Name', 'Value', 'Date'
    ])

    # filter relevant columns for reports variables
    # sort on form collection order and field order (this matches Castor)
    reports_structure_filtered = study_structure.filter(
        ['Form Type', 'Form Collection Name',
         'Form Collection Order', 'Form Name', 'Form Order',
         'Field Variable Name', 'Field Label', 'Field ID', 'Field Type',
         'Field Order', 'Calculation Template',
         'Field Option Group'], axis=1) \
        .sort_values(['Form Order', 'Form Collection Name',
                      'Form Collection Order', 'Field Order'])

    # filter datatypes that are (most of the times) unusable for ML model
    # filter variables that are repeated measurements (i.e. reports data).
    # filter variables that have no Field Variable name (additional remarks?)
    reports_structure_filtered = reports_structure_filtered[
        reports_structure_filtered['Field Type'].isin([
            'radio', 'date', 'dropdown', 'checkbox', 'string', 'numeric',
            'calculation', 'time'
        ])]
    reports_structure_filtered = reports_structure_filtered[
        reports_structure_filtered['Form Type'].isin(['Report'])]
    reports_structure_filtered = reports_structure_filtered[
        ~reports_structure_filtered['Field Variable Name'].isna()]
    reports_structure_filtered = reports_structure_filtered[
        reports_structure_filtered['Form Collection Name'].isin(
            ['Daily case record form'])]

    # merge the structure and the data to get full dataset
    reports_data_all = pandas.merge(reports_structure_filtered[
        ['Field Variable Name', 'Field ID']],
        reports_data_filtered[['Record ID', 'Value',
                               'Form Instance ID', 'Field ID']],
        on='Field ID')\
        .pivot(index='Form Instance ID',
               columns='Field Variable Name',
               values='Value')

    # Record ID has vanished; now add Record ID again.
    # (probably smarter to do this using pivot_table, but doesnt work?)
    reports_data_all = pandas.merge(reports_data_all,
                                    reports_data_filtered[
                                        ['Record ID', 'Form Instance ID']],
                                    on='Form Instance ID')\
                             .drop_duplicates()

    # reorganize data to put record id and assesment date in front.
    cols = reports_data_all.columns.tolist()
    # admission date ICU according to report
    cols.insert(0, cols.pop(cols.index('assessment_dt')))
    cols.insert(0, cols.pop(cols.index('Record ID')))
    cols.pop(cols.index('Form Instance ID'))  # drop this one, not needed
    reports_data_final = reports_data_all.reindex(columns=cols)

    # STEP 2B: collect data from COMPLICATIONS reports
    # PLEASE NOTE THAT THIS WORKS, but as of 31/3 no complications data is
    # present; hence this option is disabled.
    # if you enable it, make sure to add two outputs as well.
    if False:
        complications_struct = study_structure \
            .filter(['Form Type', 'Form Collection Name',
                     'Form Collection Order', 'Form Name', 'Form Order',
                     'Field Variable Name', 'Field Label', 'Field ID',
                     'Field Type', 'Field Order', 'Calculation Template',
                     'Field Option Group'], axis=1) \
            .sort_values(['Form Order', 'Form Collection Name',
                          'Form Collection Order', 'Field Order'])
        complications_struct = complications_struct[
            complications_struct['Form Type'].isin(['Report'])]
        complications_struct = complications_struct[
            ~complications_struct['Field Variable Name'].isna()]
        complications_struct = complications_struct[
            complications_struct['Form Collection Name'].isin(
                ['Complications'])]

        # TODO: get actual complications
        # get raw data without deleted and test data, ignore junk instances
        complications_data = study_data[study_data['Form Type'].isin(
            ['Complications'])]
        complications_data_filtered = complications_data[
            ~complications_data['Form Instance ID'].isna()]

        # problem: daily reports are dynamic, changing over time.
        #       As are their ID's. On top of that people can rename the form.
        # solution: look for all reports that start with 'Daily' and find
        #      their Form Instance ID. Then use that to select all reports.
        complication_form_instance_IDs = \
            complications_data_filtered['Form Instance ID'][
                complications_data_filtered['Form Instance Name']
                .str.match('.*Complications.*')].unique()
        complication_true = [
            s in complication_form_instance_IDs
            for s in complications_data_filtered['Form Instance ID']
        ]
        complications_data_filtered = \
            complications_data_filtered[complication_true]
        complications_data_filtered = \
            complications_data_filtered.filter(
                ['Record ID', 'Field ID', 'Form Type', 'Form Instance ID',
                 'Form Instance Name', 'Value', 'Date'])

    # STEP 3: CLEANUP
    del (c, cols, reports_data_filtered, reports_data_all, study_structure)
    del (study_data_filtered, study_data, daily_report_form_instance_IDs,
         daily_report_true)

    # STEP 4: RETURN THIS DATA
    # study data:
    # study_structure_filtered
    # study_data_final  # note that record ID is the named index

    # reports data;
    # reports_structure_filtered
    # reports_data_final  # note that record ID can not be the named
    #                       index, because multiple entries exist.

    # STEP 5: (TODO) summarize data from reports and add the summary stats
    # to study_data_final TODO
    return study_data_final, study_structure_filtered, reports_data_final,\
        reports_structure_filtered,  optiongroups_struct
Exemple #8
0
# step 2: install package castorapi
# like this: https://docs.anaconda.com/anaconda/navigator/tutorials/manage-packages/#installing-a-package
#
# step 1 and 2 combined:
# right click on the green arrow in environment, click run terminal
# run: `conda install -c wouterpotters castorapi` in the terminal

# store the secret and client files as described here:
# https://github.com/wouterpotters/castorapi/blob/master/README.md#usage

# now use the package
import castorapi

path_to_client_secret = r'C:/path/to/api_secret_a'  # FORWARD SLASHES! OR \\ for each backward slash

c = castorapi.CastorApi(
    path_to_client_secret)  # e.g. in user dir outside of GIT repo

# get study ID for Parkinson study
c.select_study_by_name(
    'parkinson')  # change name to match study name in castor

### STEP 0: collect answer options from optiongroups

# get answer option groups for multiple choice questions
df_answeroptions_struct = c.request_study_export_optiongroups()

# get the main study structure (i.e. questions)
df_study_structure = c.request_study_export_structure()

# filter unused columns from df_study_structure, sort fields
df_study_structure = df_study_structure \
Exemple #9
0
    return study_structure, reports_structure, optiongroups_struct


if __name__ == "__main__":
    config = configparser.ConfigParser()
    config.read(os.path.join(os.path.dirname(__file__),
                             '../user_settings.ini'))

    path_to_api_creds = config['CastorCredentials']['local_private_path']

    # input: private folder where client & secret files (no extension,
    #        1 string only per file) from castor are saved by the user
    # see also:
    # https://helpdesk.castoredc.com/article/124-application-programming-interface-api
    c = ca.CastorApi(path_to_api_creds)  # e.g. in user dir outside of GIT repo

    # get study ID for COVID study
    if False:
        name = 'COVID-19 NL'
        excel_postfix = ''
    else:
        name = 'Clinical features of COVID-19 positive patients in VieCuri'
        excel_postfix = '_viecurie.xlsx'

    study_id = c.select_study_by_name(name)

    study_name = c.request_study(study_id=study_id)['name']

    # # Get all data from Castor database (without any selection criterium)
    # Note that you need export rights for every individual center.
Exemple #10
0
Created on Thu Mar 26 21:51:39 2020

@author: wouterpotters
"""
import time
import statistics
import castorapi as ca
import configparser
config = configparser.ConfigParser()
config.read('../user_settings.ini')

# put both the secret, client and the tokens_slack file here
location_castor_slack_api_data = config['SlackAPI']['local_private_path']

c = ca.CastorApi(
    location_castor_slack_api_data)  # e.g. in user dir outside of GIT repo

# get study ID for COVID study
study_id = c.select_study_by_name('COVID-19 NL')


# Posting to a Slack channel
def send_message_to_slack(text):
    from urllib import request
    import json
    post = {"text": "{0}".format(text)}

    try:
        json_data = json.dumps(post)

        # the tokens_slack file should contain the full URL with the token to submit data to slack