Esempio n. 1
0
#        records_list[project_index] = exportRecords(api_url_list[project_index], api_key_list[project_index], record_id_list=[record_id for record_id in record_id_map_list[0]]) # USED BEFORE REVERSING ORDER OF PROJECT DATA RETRIEVAL
        # Only pull record IDs from first project that exist in second project.
        records_list[project_index] = exportRecords(api_url_list[project_index], api_key_list[project_index], record_id_list=[record_id for record_id in record_id_map_list[1]]) # USED AFTER REVERSING ORDER OF PROJECT DATA RETRIEVAL
    
    # Check for high-level issues in project settings, metadata, records.
    project_compatible_list[project_index] = isProjectCompatible(metadata_list[project_index], records_list[project_index], def_field_list[project_index])
    if (not project_compatible_list[project_index]):
        sys.exit()
    
    
    # Generate a non redundant list of record IDs. 
    record_id_map_list[project_index] = createRecordIDMap(def_field_list[project_index], records_list[project_index])
    
    
    # Generate a list of data access groups if they exist.
    dags_used_list[project_index], dags_list[project_index] = getDAGs(records_list[project_index])
    
    
    # Generate a dictionary containing information about each dag (e.g. number of records they contain).
    dag_record_map_list[project_index] = createDAGRecordMap(def_field_list[project_index], records_list[project_index], record_id_map_list[project_index], dags_used_list[project_index], dags_list[project_index])

# Check for records that only appear in the first project.
for record_id in record_id_map_list[0]:
    if (not record_id in record_id_map_list[1]):
        print "Record: "+record_id+" found in first project but not in second project. This will probably cause errors."

# Generate list of checks to perform (defualt & user-defined). 
checklist = createChecklist(check_name_list)

# Perform checks on data and report issues.
check_results = checkDriverInterProject(checklist, out_dir, def_field_list, forms_list, project_info_list, project_longitudinal_list, project_repeating_list, events_list, metadata_list, form_event_mapping_list, repeating_forms_events_list, form_repetition_map_list, records_list, record_id_map_list, dags_used_list, dags_list, dag_record_map_list)
Esempio n. 2
0
def reportPatientInfo(patient_info, out_dir, path_dag_info):
    ## Miscellaneous items used in all of the enrolment reports
    min_year = 2003
    max_year = 2020  #2019
    year_list = range(min_year, max_year + 1)

    records_ipss = exportRecords(url_ipss, key_ipss, fields=["ipssid"])
    dags = getDAGs(records_ipss)[1]
    # Put "Unassigned" at end of list.
    dags_old = dags
    dags = sorted(dags_old)[1:]
    dags.extend(sorted(dags_old)[:1])

    # Check if all records belong to one of the DAGs in the list just created.
    for record_id, record in patient_info.iteritems():
        if (not record["dag"] in dags):
            print "Record with ID", record_id, "in DAG", record[
                dag], "is part of unidentified DAG."

    # Enrolment by site per year
    report_path = os.path.join(out_dir, "enrolment_dag.csv")

    # Write row/column headings
    columns = year_list
    index = [dag if (dag != "") else "Unassigned" for dag in dags]

    # Create pandas DataFrame to store report.
    report_df = pandas.DataFrame(columns=columns, index=index)

    # Add row for each DAG.
    for dag in dags:
        if (dag != ""):
            dag_name = dag
        else:
            dag_name = "Unassigned"
        for year in year_list:
            num_enrolled_dag_year = 0
            for record_id, record in patient_info.iteritems():
                if ("enroll_date" in record) and (type(record["enroll_date"])
                                                  != type(year)):
                    print "WARNING: comparison of different types in 'enroll_date'."
                if (record["dag"] == dag) and ("enroll_date" in record) and (
                        record["enroll_date"] == year):
                    num_enrolled_dag_year += 1
            report_df[year][dag_name] = num_enrolled_dag_year

    # Add columns/rows to store column/row totals.
    report_df["Total"] = report_df.sum(axis=1).astype(int)  # Total column
    report_df = report_df.append(
        report_df.sum(axis=0).astype(int).rename("Total"))  # Total row

    # Add instition name and country columns to dataframe.
    report_df = addDAGInfo(report_df, path_dag_info)

    report_df.to_csv(report_path)
    print report_df

    ## Enrolment by stroke type per year
    report_path = os.path.join(out_dir, "enrolment_stroke_type.csv")

    # Write row/column headings
    columns = year_list
    index = [
        "Neonatal AIS", "Neonatal CSVT", "Neonatal AIS & CSVT",
        "Childhood AIS", "Childhood CSVT", "Childhood AIS & CSVT",
        "Presumed perinatal AIS", "Presumed perinatal CSVT",
        "Presumed perinatal AIS & CSVT", "Presumed perinatal VI",
        "Arteriopathy", "Other"
    ]

    report_df = pandas.DataFrame(0, columns=columns, index=index)

    # Add each patient with known stroke type to report.
    for id, record in patient_info.iteritems():
        if ("enroll_date" in record) and (
                record["enroll_date"] in columns
        ):  # If enrolment date is known and included in the report.
            year = record["enroll_date"]
            if (record["stroke_type"]["neo_ais"]
                    == "1") and (record["stroke_type"]["neo_csvt"] == "1"):
                report_df[year]["Neonatal AIS & CSVT"] += 1
            elif (record["stroke_type"]["neo_ais"] == "1"):
                report_df[year]["Neonatal AIS"] += 1
            elif (record["stroke_type"]["neo_csvt"] == "1"):
                report_df[year]["Neonatal CSVT"] += 1
            elif (record["stroke_type"]["child_ais"]
                  == "1") and (record["stroke_type"]["child_csvt"] == "1"):
                report_df[year]["Childhood AIS & CSVT"] += 1
            elif (record["stroke_type"]["child_ais"] == "1"):
                report_df[year]["Childhood AIS"] += 1
            elif (record["stroke_type"]["child_csvt"] == "1"):
                report_df[year]["Childhood CSVT"] += 1
            elif (record["stroke_type"]["pp_ais"]
                  == "1") and (record["stroke_type"]["pp_csvt"] == "1"):
                report_df[year]["Presumed perinatal AIS & CSVT"] += 1
            elif (record["stroke_type"]["pp_ais"] == "1"):
                report_df[year]["Presumed perinatal AIS"] += 1
            elif (record["stroke_type"]["pp_csvt"] == "1"):
                report_df[year]["Presumed perinatal CSVT"] += 1
            elif (record["stroke_type"]["pp_vi"] == "1"):
                report_df[year]["Presumed perinatal VI"] += 1
            elif (record["stroke_type"]["art"] == "1"):
                report_df[year]["Arteriopathy"] += 1
            elif (record["stroke_type"]["other"] == "1"):
                report_df[year]["Other"] += 1

    report_df["Total"] = report_df.sum(axis=1).astype(int)  # Total column
    report_df = report_df.append(
        report_df.sum(axis=0).astype(int).rename("Total"))  # Total row
    report_df.to_csv(report_path)
    print report_df

    return
Esempio n. 3
0
def mainIntraProject(config_path):
    config = readConfig(config_path)
    print "Performing checks with configuration:"
    pprint(config)
    print

    #### Read user's settings.yml file, which will be used to get API tokens and URLs.
    api_settings = ApiSettings(
    )  # Create instance of ApiSettings class. Use this to find file containing API keys and URLs.

    # Determine the API URL and API token based on the users input and api_keys.yml file.
    code_name = config["code_name"]
    api_url, api_key, code_name = api_settings.getApiCredentials(
        code_name=code_name)

    # Create output directory if it does not exist.
    out_dir = config["out_dir"]
    if (not os.path.isdir(out_dir)):
        os.mkdir(out_dir)
        print "Created directory:", out_dir

    # Define a list containing the lists of Check objects (defined in Check.py).
    check_name_list = config["checks"]

    check_paths_exist = True
    for check_name in check_name_list:
        scriptdir = os.path.dirname(os.path.realpath(__file__))
        check_path = os.path.join(scriptdir, check_name + ".py")
        if not os.path.exists(check_path):
            raise Exception("Path does not exist:", check_path)

    # Load REDCap project (a PyCap object).
    project = redcap.Project(api_url, api_key)

    # Get the field name of the unique identifying field (e.g. "ipssid").
    def_field = project.def_field

    # Load high-level projct information.
    project_info = exportProjectInfo(api_url, api_key)
    project_longitudinal = bool(project_info["is_longitudinal"])
    project_repeating = bool(
        project_info["has_repeating_instruments_or_events"])

    # Load list of events
    events = getEvents(api_url,
                       api_key)  #project, project_info, project_longitudinal)
    if (not events == None):
        print "Review the event_ids below. These are required for generating links to problematic data in reports. If these are incorrect, or unset, you can set them in the event_ids.yml file specified in your settings.yml file. You can find the event_id associated with an event by accessing data from that event online, and looking at the value of 'event_id' in the address bar."
        for event in events:
            event_id = events[event]["event_id"]
            if (not event_id == None):
                print Color.green + event + " " + event_id + Color.end
            else:
                print Color.red + event + " " + 'None' + Color.end
    print

    # Load raw data dictionary.
    metadata_raw = project.export_metadata()

    # Load instrument-event mapping
    form_event_mapping = exportFormEventMapping(project, project_longitudinal)

    # Load information specifying which forms are repeating.
    repeating_forms_events = exportRepeatingFormsEvents(
        api_url, api_key, project_repeating)

    # Generate list of forms - list of dicts with two keys: 'instrument_label' and 'instrument_name'
    forms = exportFormsOrdered(api_url, api_key)

    # Generate a dictionary with form_names as keys; each entry is a dict specifying in which
    # events the form is non-repeating, indpendently repeating, or dependently repeating.
    form_repetition_map = createFormRepetitionMap(project_longitudinal,
                                                  project_repeating,
                                                  form_event_mapping,
                                                  repeating_forms_events,
                                                  forms)

    # Gather data about each variable.
    metadata = parseMetadata(def_field, project_info, project_longitudinal,
                             project_repeating, events, metadata_raw,
                             form_event_mapping, repeating_forms_events, forms,
                             form_repetition_map)

    ## Load all records.
    if config["use_getIPSSIDs"]:
        getIPSSIDs_args = config["getIPSSIDs_args"]
        record_id_list = getIPSSIDs(**getIPSSIDs_args)
    elif config["use_custom_record_id_list"]:
        record_id_list = config["record_id_list"]
    else:
        record_id_list = None
    records = exportRecords(api_url, api_key, record_id_list)

    # Check for high-level issues in project settings, metadata, records.
    # 2020-05-11 - This script appears to check for bugged output of exportRecords.py, which has now been handled in exportRecords.py.
    #    project_compatible = isProjectCompatible(metadata, records, def_field)
    #    if (not project_compatible):
    #        raise Exception("Error found in records or metadata. Review output above.")

    # Generate a dictionary with record IDs as keys and a list of row numbers corresponding to that record as values.
    record_id_map = createRecordIDMap(def_field, records)

    # Generate a list of data access groups if they exist.
    dags_used, dags = getDAGs(records)

    # Generate a dictionary containing information about each dag (e.g. number of records they contain).
    dag_record_map = createDAGRecordMap(def_field, records, record_id_map,
                                        dags_used, dags)

    # Generate list of checks to perform (default & user-defined).
    checklist = createChecklist(check_name_list)

    # Perform checks on data and report issues.
    check_results = checkDriver(checklist, out_dir, def_field, forms,
                                project_info, project_longitudinal,
                                project_repeating, events, metadata,
                                form_event_mapping, repeating_forms_events,
                                form_repetition_map, records, record_id_map,
                                dags_used, dags, dag_record_map)

    #    # Save data exported from REDCap and generated in this script. The check results are saved by checkDriver() above.
    #    saveData(out_dir, project, forms, project_info, metadata, record_id_map, dags_used, dags, check_results)
    return