# records_list[project_index] = exportRecords(api_url_list[project_index], api_key_list[project_index], record_id_list=[record_id for record_id in record_id_map_list[0]]) # USED BEFORE REVERSING ORDER OF PROJECT DATA RETRIEVAL # Only pull record IDs from first project that exist in second project. records_list[project_index] = exportRecords(api_url_list[project_index], api_key_list[project_index], record_id_list=[record_id for record_id in record_id_map_list[1]]) # USED AFTER REVERSING ORDER OF PROJECT DATA RETRIEVAL # Check for high-level issues in project settings, metadata, records. project_compatible_list[project_index] = isProjectCompatible(metadata_list[project_index], records_list[project_index], def_field_list[project_index]) if (not project_compatible_list[project_index]): sys.exit() # Generate a non redundant list of record IDs. record_id_map_list[project_index] = createRecordIDMap(def_field_list[project_index], records_list[project_index]) # Generate a list of data access groups if they exist. dags_used_list[project_index], dags_list[project_index] = getDAGs(records_list[project_index]) # Generate a dictionary containing information about each dag (e.g. number of records they contain). dag_record_map_list[project_index] = createDAGRecordMap(def_field_list[project_index], records_list[project_index], record_id_map_list[project_index], dags_used_list[project_index], dags_list[project_index]) # Check for records that only appear in the first project. for record_id in record_id_map_list[0]: if (not record_id in record_id_map_list[1]): print "Record: "+record_id+" found in first project but not in second project. This will probably cause errors." # Generate list of checks to perform (defualt & user-defined). checklist = createChecklist(check_name_list) # Perform checks on data and report issues. check_results = checkDriverInterProject(checklist, out_dir, def_field_list, forms_list, project_info_list, project_longitudinal_list, project_repeating_list, events_list, metadata_list, form_event_mapping_list, repeating_forms_events_list, form_repetition_map_list, records_list, record_id_map_list, dags_used_list, dags_list, dag_record_map_list)
def reportPatientInfo(patient_info, out_dir, path_dag_info): ## Miscellaneous items used in all of the enrolment reports min_year = 2003 max_year = 2020 #2019 year_list = range(min_year, max_year + 1) records_ipss = exportRecords(url_ipss, key_ipss, fields=["ipssid"]) dags = getDAGs(records_ipss)[1] # Put "Unassigned" at end of list. dags_old = dags dags = sorted(dags_old)[1:] dags.extend(sorted(dags_old)[:1]) # Check if all records belong to one of the DAGs in the list just created. for record_id, record in patient_info.iteritems(): if (not record["dag"] in dags): print "Record with ID", record_id, "in DAG", record[ dag], "is part of unidentified DAG." # Enrolment by site per year report_path = os.path.join(out_dir, "enrolment_dag.csv") # Write row/column headings columns = year_list index = [dag if (dag != "") else "Unassigned" for dag in dags] # Create pandas DataFrame to store report. report_df = pandas.DataFrame(columns=columns, index=index) # Add row for each DAG. for dag in dags: if (dag != ""): dag_name = dag else: dag_name = "Unassigned" for year in year_list: num_enrolled_dag_year = 0 for record_id, record in patient_info.iteritems(): if ("enroll_date" in record) and (type(record["enroll_date"]) != type(year)): print "WARNING: comparison of different types in 'enroll_date'." if (record["dag"] == dag) and ("enroll_date" in record) and ( record["enroll_date"] == year): num_enrolled_dag_year += 1 report_df[year][dag_name] = num_enrolled_dag_year # Add columns/rows to store column/row totals. report_df["Total"] = report_df.sum(axis=1).astype(int) # Total column report_df = report_df.append( report_df.sum(axis=0).astype(int).rename("Total")) # Total row # Add instition name and country columns to dataframe. report_df = addDAGInfo(report_df, path_dag_info) report_df.to_csv(report_path) print report_df ## Enrolment by stroke type per year report_path = os.path.join(out_dir, "enrolment_stroke_type.csv") # Write row/column headings columns = year_list index = [ "Neonatal AIS", "Neonatal CSVT", "Neonatal AIS & CSVT", "Childhood AIS", "Childhood CSVT", "Childhood AIS & CSVT", "Presumed perinatal AIS", "Presumed perinatal CSVT", "Presumed perinatal AIS & CSVT", "Presumed perinatal VI", "Arteriopathy", "Other" ] report_df = pandas.DataFrame(0, columns=columns, index=index) # Add each patient with known stroke type to report. for id, record in patient_info.iteritems(): if ("enroll_date" in record) and ( record["enroll_date"] in columns ): # If enrolment date is known and included in the report. year = record["enroll_date"] if (record["stroke_type"]["neo_ais"] == "1") and (record["stroke_type"]["neo_csvt"] == "1"): report_df[year]["Neonatal AIS & CSVT"] += 1 elif (record["stroke_type"]["neo_ais"] == "1"): report_df[year]["Neonatal AIS"] += 1 elif (record["stroke_type"]["neo_csvt"] == "1"): report_df[year]["Neonatal CSVT"] += 1 elif (record["stroke_type"]["child_ais"] == "1") and (record["stroke_type"]["child_csvt"] == "1"): report_df[year]["Childhood AIS & CSVT"] += 1 elif (record["stroke_type"]["child_ais"] == "1"): report_df[year]["Childhood AIS"] += 1 elif (record["stroke_type"]["child_csvt"] == "1"): report_df[year]["Childhood CSVT"] += 1 elif (record["stroke_type"]["pp_ais"] == "1") and (record["stroke_type"]["pp_csvt"] == "1"): report_df[year]["Presumed perinatal AIS & CSVT"] += 1 elif (record["stroke_type"]["pp_ais"] == "1"): report_df[year]["Presumed perinatal AIS"] += 1 elif (record["stroke_type"]["pp_csvt"] == "1"): report_df[year]["Presumed perinatal CSVT"] += 1 elif (record["stroke_type"]["pp_vi"] == "1"): report_df[year]["Presumed perinatal VI"] += 1 elif (record["stroke_type"]["art"] == "1"): report_df[year]["Arteriopathy"] += 1 elif (record["stroke_type"]["other"] == "1"): report_df[year]["Other"] += 1 report_df["Total"] = report_df.sum(axis=1).astype(int) # Total column report_df = report_df.append( report_df.sum(axis=0).astype(int).rename("Total")) # Total row report_df.to_csv(report_path) print report_df return
def mainIntraProject(config_path): config = readConfig(config_path) print "Performing checks with configuration:" pprint(config) print #### Read user's settings.yml file, which will be used to get API tokens and URLs. api_settings = ApiSettings( ) # Create instance of ApiSettings class. Use this to find file containing API keys and URLs. # Determine the API URL and API token based on the users input and api_keys.yml file. code_name = config["code_name"] api_url, api_key, code_name = api_settings.getApiCredentials( code_name=code_name) # Create output directory if it does not exist. out_dir = config["out_dir"] if (not os.path.isdir(out_dir)): os.mkdir(out_dir) print "Created directory:", out_dir # Define a list containing the lists of Check objects (defined in Check.py). check_name_list = config["checks"] check_paths_exist = True for check_name in check_name_list: scriptdir = os.path.dirname(os.path.realpath(__file__)) check_path = os.path.join(scriptdir, check_name + ".py") if not os.path.exists(check_path): raise Exception("Path does not exist:", check_path) # Load REDCap project (a PyCap object). project = redcap.Project(api_url, api_key) # Get the field name of the unique identifying field (e.g. "ipssid"). def_field = project.def_field # Load high-level projct information. project_info = exportProjectInfo(api_url, api_key) project_longitudinal = bool(project_info["is_longitudinal"]) project_repeating = bool( project_info["has_repeating_instruments_or_events"]) # Load list of events events = getEvents(api_url, api_key) #project, project_info, project_longitudinal) if (not events == None): print "Review the event_ids below. These are required for generating links to problematic data in reports. If these are incorrect, or unset, you can set them in the event_ids.yml file specified in your settings.yml file. You can find the event_id associated with an event by accessing data from that event online, and looking at the value of 'event_id' in the address bar." for event in events: event_id = events[event]["event_id"] if (not event_id == None): print Color.green + event + " " + event_id + Color.end else: print Color.red + event + " " + 'None' + Color.end print # Load raw data dictionary. metadata_raw = project.export_metadata() # Load instrument-event mapping form_event_mapping = exportFormEventMapping(project, project_longitudinal) # Load information specifying which forms are repeating. repeating_forms_events = exportRepeatingFormsEvents( api_url, api_key, project_repeating) # Generate list of forms - list of dicts with two keys: 'instrument_label' and 'instrument_name' forms = exportFormsOrdered(api_url, api_key) # Generate a dictionary with form_names as keys; each entry is a dict specifying in which # events the form is non-repeating, indpendently repeating, or dependently repeating. form_repetition_map = createFormRepetitionMap(project_longitudinal, project_repeating, form_event_mapping, repeating_forms_events, forms) # Gather data about each variable. metadata = parseMetadata(def_field, project_info, project_longitudinal, project_repeating, events, metadata_raw, form_event_mapping, repeating_forms_events, forms, form_repetition_map) ## Load all records. if config["use_getIPSSIDs"]: getIPSSIDs_args = config["getIPSSIDs_args"] record_id_list = getIPSSIDs(**getIPSSIDs_args) elif config["use_custom_record_id_list"]: record_id_list = config["record_id_list"] else: record_id_list = None records = exportRecords(api_url, api_key, record_id_list) # Check for high-level issues in project settings, metadata, records. # 2020-05-11 - This script appears to check for bugged output of exportRecords.py, which has now been handled in exportRecords.py. # project_compatible = isProjectCompatible(metadata, records, def_field) # if (not project_compatible): # raise Exception("Error found in records or metadata. Review output above.") # Generate a dictionary with record IDs as keys and a list of row numbers corresponding to that record as values. record_id_map = createRecordIDMap(def_field, records) # Generate a list of data access groups if they exist. dags_used, dags = getDAGs(records) # Generate a dictionary containing information about each dag (e.g. number of records they contain). dag_record_map = createDAGRecordMap(def_field, records, record_id_map, dags_used, dags) # Generate list of checks to perform (default & user-defined). checklist = createChecklist(check_name_list) # Perform checks on data and report issues. check_results = checkDriver(checklist, out_dir, def_field, forms, project_info, project_longitudinal, project_repeating, events, metadata, form_event_mapping, repeating_forms_events, form_repetition_map, records, record_id_map, dags_used, dags, dag_record_map) # # Save data exported from REDCap and generated in this script. The check results are saved by checkDriver() above. # saveData(out_dir, project, forms, project_info, metadata, record_id_map, dags_used, dags, check_results) return