Exemple #1
0
                               inc_unknown_stroke_type=False,
                               inc_sk_patients=False)
record_ids_psom = getIPSSIDs(db="psom",
                             inc_registry_only=False,
                             inc_unknown_stroke_type=False)

# Load REDCap project (a PyCap object).
project = redcap.Project(api_url_ipss, api_key_ipss)
def_field = project.def_field
project_info = exportProjectInfo(api_url_ipss, api_key_ipss)
project_longitudinal = bool(project_info["is_longitudinal"])
project_repeating = bool(project_info["has_repeating_instruments_or_events"])
events = getEvents(api_url_ipss, api_key_ipss)
metadata_raw = project.export_metadata()
form_event_mapping = exportFormEventMapping(project, project_longitudinal)
repeating_forms_events = exportRepeatingFormsEvents(api_url_ipss, api_key_ipss,
                                                    project_repeating)
forms = exportFormsOrdered(api_url_ipss, api_key_ipss)
form_repetition_map = createFormRepetitionMap(project_longitudinal,
                                              project_repeating,
                                              form_event_mapping,
                                              repeating_forms_events, forms)
metadata = parseMetadata(def_field, project_info, project_longitudinal,
                         project_repeating, events, metadata_raw,
                         form_event_mapping, repeating_forms_events, forms,
                         form_repetition_map)

# Export records for non-registry-only patients.
#records_arch = exportRecords(url_arch, key_arch, record_id_list=record_ids_arch, label=True)
records = exportRecords(api_url_ipss,
                        api_key_ipss,
                        record_id_list=record_ids,
Exemple #2
0
    
    
    # Load list of events
    events_list[project_index] = getEvents(api_url_list[project_index], api_key_list[project_index])#project_list[project_index], project_info_list[project_index], project_longitudinal_list[project_index])
    
    
    # Load raw data dictionary.
    metadata_list[project_index] = project_list[project_index].export_metadata()
    
    
    # Load instrument-event mapping
    form_event_mapping_list[project_index] = exportFormEventMapping(project_list[project_index], project_longitudinal_list[project_index])
    
    
    # Load information specifying which forms are repeating.
    repeating_forms_events_list[project_index] = exportRepeatingFormsEvents(api_url_list[project_index], api_key_list[project_index], project_repeating_list[project_index])
    
    
    # Generate list of forms - list of dicts with two keys: 'instrument_label' and 'instrument_name'
    forms_list[project_index] = exportFormsOrdered(api_url_list[project_index], api_key_list[project_index])
    

    # Generate a dictionary with form_names as keys; each entry is a dict specifying in which 
    # events the form is non-repeating, indpendently repeating, or dependently repeating.
    form_repetition_map_list[project_index] = createFormRepetitionMap(project_longitudinal_list[project_index], project_repeating_list[project_index], form_event_mapping_list[project_index], repeating_forms_events_list[project_index], forms_list[project_index])
    
    
    # Gather data about each variable.
    metadata_list[project_index] = parseMetadata(def_field_list[project_index], project_info_list[project_index], project_longitudinal_list[project_index], project_repeating_list[project_index], events_list[project_index], metadata_list[project_index], form_event_mapping_list[project_index], repeating_forms_events_list[project_index], forms_list[project_index], form_repetition_map_list[project_index])
    
    
Exemple #3
0
def buildProjects(config):
    #### Read user's settings.yml file, which will be used to get API tokens and URLs.
    api_settings = ApiSettings()

    ## Build a list of "projects" - dicts which store data and settings for the project.
    projects = config["projects"]

    ## Verify the settings for each project.
    for project in projects:
        code_name = project["code_name"]

        # Get args to pass to exportRecords.
        if (not "exportRecords_args"
                in project) or (project["exportRecords_args"] is None):
            project["exportRecords_args"] = {}

        # If use_getIPSSIDs is True, get list of record IDs to export.
        if project["options"]["use_getIPSSIDs"]:
            # If use_getIPSSIDs is True, but no options provided, raise warning.
            if (not "getIPSSIDs_args"
                    in project) or (project["getIPSSIDs_args"] is None):
                print "Warning: in project '" + code_name + "', 'use_getIPSSIDs' is True, but 'getIPSSIDs_args' not provided for project. Exporting all record IDs from project."
                record_id_list = None
            else:
                getIPSSIDs_args = project["getIPSSIDs_args"]
                record_id_list = getIPSSIDs(**getIPSSIDs_args)

            # If exportRecords_args has an entry for record_id_list, but use_getIPSSIDs is True, raise warning.
            if (project["options"]["use_getIPSSIDs"]) and (
                    "record_id_list" in project["exportRecords_args"]):
                print "Warning: in project '" + code_name + "', the specified 'record_id_list' will be ignored, since 'use_getIPSSIDs' is True."

            # Overwrite the record_id_list argument in exportRecords_args
            project["exportRecords_args"]["record_id_list"] = record_id_list

        ## Get args to pass to exportRecords. If key does not exist, or it is not set to a value, set it to an empty dict (i.e.
        exportRecords_args = project[
            "exportRecords_args"]  # has a value (possibly {}).

        # Convert exportRecords_args arguments to strings as needed.
        convert_to_strings = ["fields", "forms", "events", "record_id_list"]
        for arg in convert_to_strings:
            if arg in exportRecords_args.keys():
                if (exportRecords_args[arg] == 'None'
                    ):  # these arguments could be lists or None
                    # Convert string 'None' to Python None.
                    exportRecords_args[arg] = None
                else:
                    # Convert list to list of strings. Currently, list might contain integers etc.
                    new_list = [str(val) for val in exportRecords_args[arg]]
                    exportRecords_args[arg] = new_list

        ## Get API credentials for current project.
        api_url, api_key, code_name = api_settings.getApiCredentials(
            code_name=code_name)
        project["api_url"] = api_url
        project["api_key"] = api_key

        ## Export requested data for current project
        data_csv = exportRecords(api_url,
                                 api_key,
                                 format="csv",
                                 **exportRecords_args)
        data_csv_file = StringIO(data_csv)
        data_df = pandas.read_csv(data_csv_file,
                                  dtype=unicode,
                                  encoding='utf-8').fillna('')

        project["chunks"] = [
            data_df
        ]  # this list of dataframes will be broken into pieces, each piece containing data to be placed in a different tab.

        ## Retrieve project settings and add them to the dict for the current project
        pycap_project = redcap.Project(api_url, api_key)
        def_field = pycap_project.def_field
        project_info = exportProjectInfo(api_url, api_key)
        longitudinal = bool(project_info["is_longitudinal"])
        repeating = bool(project_info["has_repeating_instruments_or_events"])
        events = getEvents(api_url, api_key, quiet=True)
        metadata_raw = pycap_project.export_metadata()
        form_event_mapping = exportFormEventMapping(pycap_project,
                                                    longitudinal)
        repeating_forms_events = exportRepeatingFormsEvents(
            api_url, api_key, repeating)
        forms = exportFormsOrdered(api_url, api_key)
        form_repetition_map = createFormRepetitionMap(longitudinal, repeating,
                                                      form_event_mapping,
                                                      repeating_forms_events,
                                                      forms)
        metadata = parseMetadata(pycap_project.def_field,
                                 project_info,
                                 longitudinal,
                                 repeating,
                                 events,
                                 metadata_raw,
                                 form_event_mapping,
                                 repeating_forms_events,
                                 forms,
                                 form_repetition_map,
                                 write_branching_logic_function=False)

        project["pycap_project"] = pycap_project
        project["def_field"] = def_field
        project["project_info"] = project_info
        project["longitudinal"] = longitudinal
        project["repeating"] = repeating
        project["events"] = events
        project["form_event_mapping"] = form_event_mapping
        project["repeating_forms_events"] = repeating_forms_events
        project["forms"] = forms
        project["form_repetition_map"] = form_repetition_map
        project["metadata"] = metadata

        # Create dict which maps each form to a list of events containing that form.
        if longitudinal:
            form_to_events_dict = {}
            for form_event_entry in form_event_mapping:
                form = form_event_entry['form']
                event = form_event_entry['unique_event_name']
                if (not form in form_to_events_dict):
                    form_to_events_dict[form] = [event]
                else:
                    form_to_events_dict[form].append(event)
        else:
            form_to_events_dict = None
        project["form_to_events_dict"] = form_to_events_dict

        ## Build lists of variables which appear in the export data.
        # columns which uniquely identify a row
        primary_key = [def_field]
        if project["longitudinal"]:
            primary_key.append("redcap_event_name")
        if project["repeating"]:
            primary_key.append("redcap_repeat_instrument")
            primary_key.append("redcap_repeat_instance")
        project["primary_key"] = primary_key

        primary_key_and_dag = primary_key
        if ("redcap_data_access_group" in data_df.columns):
            primary_key_and_dag.append("redcap_data_access_group")
        project["primary_key_and_dag"] = primary_key_and_dag

        # form_complete fields
        form_complete_fields = [
            field for field in data_df.columns
            if ((field.endswith("_complete")) and (not field in metadata) and (
                not field in primary_key) and (
                    not field == "redcap_data_access_group"))
        ]
        project["form_complete_fields"] = form_complete_fields

        # data fields
        data_fields = [
            field for field in data_df.columns
            if ((not field in primary_key + form_complete_fields) and (
                not field == "redcap_data_access_group"))
        ]
        project["data_fields"] = data_fields

    return projects
Exemple #4
0
def mainIntraProject(config_path):
    config = readConfig(config_path)
    print "Performing checks with configuration:"
    pprint(config)
    print

    #### Read user's settings.yml file, which will be used to get API tokens and URLs.
    api_settings = ApiSettings(
    )  # Create instance of ApiSettings class. Use this to find file containing API keys and URLs.

    # Determine the API URL and API token based on the users input and api_keys.yml file.
    code_name = config["code_name"]
    api_url, api_key, code_name = api_settings.getApiCredentials(
        code_name=code_name)

    # Create output directory if it does not exist.
    out_dir = config["out_dir"]
    if (not os.path.isdir(out_dir)):
        os.mkdir(out_dir)
        print "Created directory:", out_dir

    # Define a list containing the lists of Check objects (defined in Check.py).
    check_name_list = config["checks"]

    check_paths_exist = True
    for check_name in check_name_list:
        scriptdir = os.path.dirname(os.path.realpath(__file__))
        check_path = os.path.join(scriptdir, check_name + ".py")
        if not os.path.exists(check_path):
            raise Exception("Path does not exist:", check_path)

    # Load REDCap project (a PyCap object).
    project = redcap.Project(api_url, api_key)

    # Get the field name of the unique identifying field (e.g. "ipssid").
    def_field = project.def_field

    # Load high-level projct information.
    project_info = exportProjectInfo(api_url, api_key)
    project_longitudinal = bool(project_info["is_longitudinal"])
    project_repeating = bool(
        project_info["has_repeating_instruments_or_events"])

    # Load list of events
    events = getEvents(api_url,
                       api_key)  #project, project_info, project_longitudinal)
    if (not events == None):
        print "Review the event_ids below. These are required for generating links to problematic data in reports. If these are incorrect, or unset, you can set them in the event_ids.yml file specified in your settings.yml file. You can find the event_id associated with an event by accessing data from that event online, and looking at the value of 'event_id' in the address bar."
        for event in events:
            event_id = events[event]["event_id"]
            if (not event_id == None):
                print Color.green + event + " " + event_id + Color.end
            else:
                print Color.red + event + " " + 'None' + Color.end
    print

    # Load raw data dictionary.
    metadata_raw = project.export_metadata()

    # Load instrument-event mapping
    form_event_mapping = exportFormEventMapping(project, project_longitudinal)

    # Load information specifying which forms are repeating.
    repeating_forms_events = exportRepeatingFormsEvents(
        api_url, api_key, project_repeating)

    # Generate list of forms - list of dicts with two keys: 'instrument_label' and 'instrument_name'
    forms = exportFormsOrdered(api_url, api_key)

    # Generate a dictionary with form_names as keys; each entry is a dict specifying in which
    # events the form is non-repeating, indpendently repeating, or dependently repeating.
    form_repetition_map = createFormRepetitionMap(project_longitudinal,
                                                  project_repeating,
                                                  form_event_mapping,
                                                  repeating_forms_events,
                                                  forms)

    # Gather data about each variable.
    metadata = parseMetadata(def_field, project_info, project_longitudinal,
                             project_repeating, events, metadata_raw,
                             form_event_mapping, repeating_forms_events, forms,
                             form_repetition_map)

    ## Load all records.
    if config["use_getIPSSIDs"]:
        getIPSSIDs_args = config["getIPSSIDs_args"]
        record_id_list = getIPSSIDs(**getIPSSIDs_args)
    elif config["use_custom_record_id_list"]:
        record_id_list = config["record_id_list"]
    else:
        record_id_list = None
    records = exportRecords(api_url, api_key, record_id_list)

    # Check for high-level issues in project settings, metadata, records.
    # 2020-05-11 - This script appears to check for bugged output of exportRecords.py, which has now been handled in exportRecords.py.
    #    project_compatible = isProjectCompatible(metadata, records, def_field)
    #    if (not project_compatible):
    #        raise Exception("Error found in records or metadata. Review output above.")

    # Generate a dictionary with record IDs as keys and a list of row numbers corresponding to that record as values.
    record_id_map = createRecordIDMap(def_field, records)

    # Generate a list of data access groups if they exist.
    dags_used, dags = getDAGs(records)

    # Generate a dictionary containing information about each dag (e.g. number of records they contain).
    dag_record_map = createDAGRecordMap(def_field, records, record_id_map,
                                        dags_used, dags)

    # Generate list of checks to perform (default & user-defined).
    checklist = createChecklist(check_name_list)

    # Perform checks on data and report issues.
    check_results = checkDriver(checklist, out_dir, def_field, forms,
                                project_info, project_longitudinal,
                                project_repeating, events, metadata,
                                form_event_mapping, repeating_forms_events,
                                form_repetition_map, records, record_id_map,
                                dags_used, dags, dag_record_map)

    #    # Save data exported from REDCap and generated in this script. The check results are saved by checkDriver() above.
    #    saveData(out_dir, project, forms, project_info, metadata, record_id_map, dags_used, dags, check_results)
    return