def main(): # obtaining command line arguments for path to config directory args = parse_args() configuration_directory = os.path.abspath(args['configuration_directory_path']) if 'debug' in args: if args['debug']: debugging = args['debug'] else: debugging = False else: debugging = False # Configure logging logger = configure_logging(args['verbose'], args['logfile']) conf_file = os.path.join(configuration_directory, 'settings.ini') settings = gsm_lib.get_settings(conf_file) gsm_lib.read_config(configuration_directory, conf_file, settings) if debugging: if settings.redcap_log_file: redcap_log_file = os.path.join(configuration_directory, settings.redcap_log_file) else: redcap_log_file = 'redcap.log.xml' # Check if xml_formatting_transform.xsl file is present/properly set in # setting.ini message2 = "Please set it with appropriate value and restart execution. " \ "For assistance refer config-example-gsm-input/settings.ini." \ "\nProgram will now terminate..." if not settings.hasoption('xml_formatting_transform_xsl'): message = "Required parameter xml_formatting_transform_xsl is missing " \ "in settings.ini. " + message2 logger.error(message) raise gsm_lib.ConfigurationError(message) elif settings.xml_formatting_transform_xsl == "": message = "Required parameter xml_formatting_transform_xsl does not " \ "have a value in settings.ini. " + message2 logger.error(message) raise gsm_lib.ConfigurationError(message) elif not os.path.exists(os.path.join(configuration_directory, settings.xml_formatting_transform_xsl)): message = "Required file xml_formatting_transform.xsl does not exist " \ "in {0}. Please make sure this file is included in the " \ "configuration directory and restart execution. For " \ "assistance refer config-example-gsm-input/xml_formatting_transform.xsl." \ "\nProgram will now terminate...".format(configuration_directory) logger.error(message) raise gsm_lib.ConfigurationError(message) # Initialize Redcap Interface rt = redcap_transactions() rt.configuration_directory = configuration_directory properties = rt.init_redcap_interface(settings, logger) #get data from the redcap for the fields listed in the source_data_schema.xml response = rt.get_data_from_redcap(properties, logger) if debugging: try: print('Writing REDCap response to file: ') with open(redcap_log_file, 'w') as log_file: log_file.write(response) except: print('Unable to write the REDCap response to ' + redcap_log_file) try: logger.debug(response) xml_tree = etree.fromstring(response) #XSL Transformation 1: This transformation removes junk data, rename elements and extracts site_id and adds new tag site_id logger.debug("Beginning XSL Transformation 1: This transformation removes junk data, rename elements and extracts site_id and adds new tag site_id") transform_xsl = os.path.join(configuration_directory, settings.xml_formatting_transform_xsl) xslt = etree.parse(transform_xsl) transform = etree.XSLT(xslt) xml_transformed = transform(xml_tree) xml_str = etree.tostring(xml_transformed, method='xml', pretty_print=True) logger.debug("XSL Transformation 1 completed.") #XSL Transformation 2: This transformation groups the data based on site_id logger.debug("Beginning XSL Transformation 2: This transformation groups the data based on site_id") transform2_xsl = proj_root + 'rsm/utils/groupby_siteid_transform.xsl' xslt = etree.parse(transform2_xsl) transform = etree.XSLT(xslt) xml_transformed2 = transform(xml_transformed) logger.debug("XSL Transformation 2 completed.") #XSL Transformation 3: This transformation removes all the nodes which are not set logger.debug("Beginning XSL Transformation 3: This transformation removes all the nodes which are not set") transform3_xsl = proj_root + 'rsm/utils/remove_junktags_transform.xsl' xslt = etree.parse(transform3_xsl) transform = etree.XSLT(xslt) xml_transformed3 = transform(xml_transformed2) logger.debug("XSL Transformation 3 completed.") #Prettifying the output generated by XSL Transformation xml_str2 = etree.tostring(xml_transformed3, method='xml', pretty_print=True) tree = etree.fromstring(xml_str2, etree.XMLParser(remove_blank_text=True)) # Loop through the start_date elements and update their values for k in tree.iter('start_date'): d = datetime.datetime.strptime(k.text, "%Y-%m-%d").date()-timedelta(days=365) k.text = str(d) #writing data to smi+site_code.xml. This xml will be saved to sftp of the site as smi.xml do_keep_gen_files = args['keep'] tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files) subject_map_input = {} for k in tree: site_code = k.attrib['id'] file_name = tmp_folder + 'smi' + site_code + '.xml' gsm_lib.write_element_tree_to_file(ET.ElementTree(k), file_name) subject_map_input[site_code] = file_name site_catalog_file = os.path.join(configuration_directory, settings.site_catalog) parse_site_details_and_send(site_catalog_file, subject_map_input, logger, settings, do_keep_gen_files) except Exception as xe: if debugging: print('Unable to transform data returned by REDCap:') print('REDCap response logged to ' + redcap_log_file) print('Ecountered the following error:') print(xe) exit() else: print('Unable to transform data returned by REDCap:') exit()
def main(): global configuration_directory # obtaining command line arguments for path to config directory args = parse_args() configuration_directory = os.path.abspath(args['configuration_directory_path']) do_keep_gen_files = args['keep'] # Configure logging logger = configure_logging(args['verbose'], args['logfile']) conf_file = os.path.join(configuration_directory, 'settings.ini') settings = gsm_lib.get_settings(conf_file) gsm_lib.read_config(configuration_directory, conf_file, settings) site_catalog_file = os.path.join(configuration_directory, settings.site_catalog) # Initialize Redcap Interface rt = redcap_transactions() rt.configuration_directory = configuration_directory properties = rt.init_redcap_interface(settings, logger) # gets data from the person index for the fields listed in the source_data_schema.xml response = rt.get_data_from_redcap(properties, logger) xml_tree = etree.fromstring(response) #XSL Transformation : transforms the person_index data transform_xsl = proj_root + "bin/utils/person_index_transform.xsl" xslt = etree.parse(transform_xsl) transform = etree.XSLT(xslt) person_index_data = transform(xml_tree) # # # retrieve smi.xml from the sftp server smi_path = get_smi_and_parse(site_catalog_file, settings, logger) try: smi_data = etree.parse(smi_path) except IOError: logger.exception("Could not open file: " + smi_path) raise #sorting both the xml files. gsm_lib.sort_element_tree(smi_data) gsm_lib.sort_element_tree(person_index_data) #generating the person index dictionary person_index_dict = {} for item in person_index_data.iter('item'): person_index_dict[item.findtext('research_subject_id')] = [ item.findtext('yob'), item.findtext('mrn'), item.findtext('facility_code')] #iterate through the smi data and generate a # new merged xml's for subject_map and subject_map_exceptions subjectmap_root = etree.Element("subject_map_records") subjectmap_exceptions_root = etree.Element("subject_map_exception_records") exceptions = False for item in smi_data.iter('item'): if item.findtext('research_subject_id') in person_index_dict.keys(): logger.debug("Processing research_subject_id %s", item.findtext('research_subject_id')) if (person_index_dict[item.findtext('research_subject_id')][0] == item.findtext('yob')): logger.debug("yob matched for research_subject_id %s", item.findtext('research_subject_id')) mrn = etree.SubElement(item, "mrn") mrn.text = person_index_dict[item.findtext('research_subject_id')][1] facility_code = etree.SubElement(item, "facility_code") facility_code.text = person_index_dict[item.findtext('research_subject_id')][2] item.remove(item.find('yob')) subjectmap_root.append(item) else: logger.debug("yob not matched for research_subject_id %s", item.findtext('research_subject_id')) exception_item = etree.Element("item") research_subject_id = etree.SubElement(exception_item, "research_subject_id") research_subject_id.text = item.findtext('research_subject_id') if (research_subject_id.text is not None): exceptions = True pi_yob = etree.SubElement(exception_item, "Person_Index_YOB") pi_yob.text = person_index_dict[item.findtext('research_subject_id')][0] hcvt_yob = etree.SubElement(exception_item, "HCVTarget_YOB") hcvt_yob.text = item.findtext('yob') subjectmap_exceptions_root.append(exception_item) #Below code transforms the xml files to csv files transform_xsl = proj_root + "bin/utils/xml2csv.xsl" xslt = etree.parse(transform_xsl) transform = etree.XSLT(xslt) tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files) subject_map_file = tmp_folder + "subject_map.csv" logger.info('Using path subject map file path: ' + subject_map_file) try: subject_map_csv = open(subject_map_file, "w") subject_map_csv.write("%s" % '"research_subject_id","start_date","end_date","mrn","facility_code"\n') for item in subjectmap_root.iter("item"): line = '"{0}","{1}","{2}","{3}","{4}"\n'.format( gsm_lib.handle_blanks(item.findtext("research_subject_id")), gsm_lib.handle_blanks(item.findtext("start_date")), gsm_lib.handle_blanks(item.findtext("end_date")), gsm_lib.handle_blanks(item.findtext("mrn")), gsm_lib.handle_blanks(item.findtext("facility_code"))) subject_map_csv.write("%s" % line) subject_map_csv.close() except IOError: logger.exception("Could not open file %s for write", subject_map_file) raise # remove the smi.xml from the folder because the XSLT process # writes data to smi.xml try: os.remove(smi_path) except OSError: logger.exception("Could not remove file %s ", smi_path) raise # send the subject_map.csv to EMR team (sftp server) parse_site_details_and_send(site_catalog_file, subject_map_file, 'sftp', settings, logger) if do_keep_gen_files: logger.info('Keeping the temporary file: ' + subject_map_file) else: logger.info('Removing the temporary file: ' + subject_map_file) os.remove(subject_map_file) # send subject_map_exceptions.csv as email attachment if exceptions: subject_map_exceptions_file = tmp_folder + 'subject_map_exceptions.csv' try: subject_map_exceptions_csv = open(subject_map_exceptions_file, "w") except IOError: logger.exception("Could not open file %s for write", subject_map_exceptions_file) raise subject_map_exceptions_csv.write("%s" % '"research_subject_id","person_index_yob","redcap_yob"\n') for item in subjectmap_exceptions_root.iter("item"): line = '"{0}","{1}","{2}"\n'.format( gsm_lib.handle_blanks(item.find("research_subject_id").text), gsm_lib.handle_blanks(item.find("Person_Index_YOB").text), gsm_lib.handle_blanks(item.find("HCVTarget_YOB").text)) subject_map_exceptions_csv.write("%s" % line) subject_map_exceptions_csv.close() parse_site_details_and_send(site_catalog_file, subject_map_exceptions_file, 'email', settings, logger) if do_keep_gen_files: logger.info('Keeping the temporary file: ' + subject_map_exceptions_file) else: logger.info('Removing the temporary file: ' + subject_map_exceptions_file) os.remove(subject_map_exceptions_file)
def main(): global configuration_directory global do_keep_gen_files global tmp_folder # obtaining command line arguments for path to config directory parser = argparse.ArgumentParser() parser.add_argument( '-c', dest='configuration_directory_path', default=default_configuration_directory, required=False, help='Specify the path to the configuration directory') # read the optional argument `-k` for keeping the generated files parser.add_argument( '-k', '--keep', default=default_do_keep_gen_files, required=False, help = 'Specify `yes` to preserve the files generated during execution') args = vars(parser.parse_args()) configuration_directory = args['configuration_directory_path'] + '/' do_keep_gen_files = False if args['keep'] is None else True #setup_json = configuration_directory + 'setup.json' global setup setup = gsm_lib.read_config(configuration_directory, 'setup.json') site_catalog_file = configuration_directory + setup['site_catalog'] system_log_file = setup['system_log_file'] # Configure logging global gsmlogger gsmlogger = GSMLogger() gsmlogger.configure_logging(system_log_file) # Initialize Redcap Interface rt = redcap_transactions() rt.configuration_directory = configuration_directory properties = rt.init_redcap_interface(setup, gsmlogger.logger) transform_xsl = configuration_directory + setup['xml_formatting_tranform_xsl'] #get data from the redcap for the fields listed in the source_data_schema.xml response = rt.get_data_from_redcap(properties, gsmlogger.logger) #XSL Transformation 1: This transformation removes junk data, rename elements and extracts site_id and adds new tag site_id xml_tree = etree.fromstring(response) xslt = etree.parse(transform_xsl) transform = etree.XSLT(xslt) xml_transformed = transform(xml_tree) xml_str = etree.tostring(xml_transformed, method='xml', pretty_print=True) #XSL Transformation 2: This transformation groups the data based on site_id transform2_xsl = proj_root + 'bin/utils/groupby_siteid_transform.xsl' xslt = etree.parse(transform2_xsl) transform = etree.XSLT(xslt) xml_transformed2 = transform(xml_transformed) #XSL Transformation 3: This transformation removes all the nodes which are not set transform3_xsl = proj_root + 'bin/utils/remove_junktags_transform.xsl' xslt = etree.parse(transform3_xsl) transform = etree.XSLT(xslt) xml_transformed3 = transform(xml_transformed2) #Prettifying the output generated by XSL Transformation xml_str2 = etree.tostring(xml_transformed3, method='xml', pretty_print=True) tree = etree.fromstring(xml_str2, etree.XMLParser(remove_blank_text=True)) # Loop through the start_date elements and update theur values for k in tree.iter('start_date'): d = datetime.datetime.strptime(k.text, "%Y-%m-%d").date()-timedelta(days=180) k.text = str(d) #writing data to smi+site_id.xml. This xml will be saved to sftp of the site as smi.xml smi_filenames = [] smi_ids = [] tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files) for k in tree: file_name = tmp_folder + 'smi' + k.attrib['id']+'.xml' gsm_lib.write_element_tree_to_file(ET.ElementTree(k), file_name) smi_filenames.append(file_name) smi_ids.append(k.attrib['id']) print 'Using smi_filenames: ' pprint.pprint(smi_filenames) parse_site_details_and_send(site_catalog_file, smi_filenames, smi_ids, gsmlogger)
def main(): # obtaining command line arguments for path to config directory args = parse_args() configuration_directory = os.path.abspath( args['configuration_directory_path']) # Configure logging logger = configure_logging(args['verbose'], args['logfile']) conf_file = os.path.join(configuration_directory, 'settings.ini') settings = gsm_lib.get_settings(conf_file) gsm_lib.read_config(configuration_directory, conf_file, settings) # Check if xml_formatting_transform.xsl file is present/properly set in # setting.ini message2 = "Please set it with appropriate value and restart execution. " \ "For assistance refer config-example-gsm-input/settings.ini." \ "\nProgram will now terminate..." if not settings.hasoption('xml_formatting_transform_xsl'): message = "Required parameter xml_formatting_transform_xsl is missing " \ "in settings.ini. " + message2 logger.error(message) raise gsm_lib.ConfigurationError(message) elif settings.xml_formatting_transform_xsl == "": message = "Required parameter xml_formatting_transform_xsl does not " \ "have a value in settings.ini. " + message2 logger.error(message) raise gsm_lib.ConfigurationError(message) elif not os.path.exists( os.path.join(configuration_directory, settings.xml_formatting_transform_xsl)): message = "Required file xml_formatting_transform.xsl does not exist " \ "in {0}. Please make sure this file is included in the " \ "configuration directory and restart execution. For " \ "assistance refer config-example-gsm-input/xml_formatting_transform.xsl." \ "\nProgram will now terminate...".format(configuration_directory) logger.error(message) raise gsm_lib.ConfigurationError(message) # Initialize Redcap Interface rt = redcap_transactions() rt.configuration_directory = configuration_directory properties = rt.init_redcap_interface(settings, logger) #get data from the redcap for the fields listed in the source_data_schema.xml response = rt.get_data_from_redcap(properties, logger) logger.debug(response) xml_tree = etree.fromstring(response) #XSL Transformation 1: This transformation removes junk data, rename elements and extracts site_id and adds new tag site_id transform_xsl = os.path.join(configuration_directory, settings.xml_formatting_transform_xsl) xslt = etree.parse(transform_xsl) transform = etree.XSLT(xslt) xml_transformed = transform(xml_tree) xml_str = etree.tostring(xml_transformed, method='xml', pretty_print=True) #XSL Transformation 2: This transformation groups the data based on site_id transform2_xsl = proj_root + 'bin/utils/groupby_siteid_transform.xsl' xslt = etree.parse(transform2_xsl) transform = etree.XSLT(xslt) xml_transformed2 = transform(xml_transformed) #XSL Transformation 3: This transformation removes all the nodes which are not set transform3_xsl = proj_root + 'bin/utils/remove_junktags_transform.xsl' xslt = etree.parse(transform3_xsl) transform = etree.XSLT(xslt) xml_transformed3 = transform(xml_transformed2) #Prettifying the output generated by XSL Transformation xml_str2 = etree.tostring(xml_transformed3, method='xml', pretty_print=True) tree = etree.fromstring(xml_str2, etree.XMLParser(remove_blank_text=True)) # Loop through the start_date elements and update theur values for k in tree.iter('start_date'): d = datetime.datetime.strptime(k.text, "%Y-%m-%d").date() - timedelta(days=365) k.text = str(d) #writing data to smi+site_code.xml. This xml will be saved to sftp of the site as smi.xml do_keep_gen_files = args['keep'] tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files) subject_map_input = {} for k in tree: site_code = k.attrib['id'] file_name = tmp_folder + 'smi' + site_code + '.xml' gsm_lib.write_element_tree_to_file(ET.ElementTree(k), file_name) subject_map_input[site_code] = file_name site_catalog_file = os.path.join(configuration_directory, settings.site_catalog) parse_site_details_and_send(site_catalog_file, subject_map_input, logger, settings, do_keep_gen_files)
def main(): global configuration_directory global do_keep_gen_files # obtaining command line arguments for path to config directory parser = argparse.ArgumentParser() parser.add_argument( "-c", dest="configuration_directory_path", default=default_configuration_directory, required=False, help="Specify the path to the configuration directory", ) # read the optional argument `-k` for keeping the generated files parser.add_argument( "-k", "--keep", default=default_do_keep_gen_files, required=False, help="Specify `yes` to preserve the files generated during execution", ) args = vars(parser.parse_args()) configuration_directory = args["configuration_directory_path"] + "/" do_keep_gen_files = False if args["keep"] is None else True # read setup options global setup setup = gsm_lib.read_config(configuration_directory, "setup.json") site_catalog_file = configuration_directory + setup["site_catalog"] system_log_file = setup["system_log_file"] # Configure logging global gsmlogger gsmlogger = GSMLogger() gsmlogger.configure_logging(system_log_file) # Initialize Redcap Interface rt = redcap_transactions() rt.configuration_directory = configuration_directory properties = rt.init_redcap_interface(setup, gsmlogger.logger) # gets data from the person index for the fields listed in the source_data_schema.xml response = rt.get_data_from_redcap(properties, gsmlogger.logger) xml_tree = etree.fromstring(response) # XSL Transformation : transforms the person_index data transform_xsl = proj_root + "bin/utils/person_index_transform.xsl" xslt = etree.parse(transform_xsl) transform = etree.XSLT(xslt) person_index_data = transform(xml_tree) # # # retrieve smi.xml from the sftp server smi_path = get_smi_and_parse(site_catalog_file) if not os.path.exists(smi_path): raise GSMLogger().LogException("Error: file " + smi_path + " not found") else: smi = open(smi_path, "r") # Below code merges the 2 xmls smi_data = etree.parse(smi_path) # sorting both the xml files. gsm_lib.sort_element_tree(smi_data) gsm_lib.sort_element_tree(person_index_data) # generating the person index dictionary person_index_dict = {} for item in person_index_data.iter("item"): person_index_dict[item.findtext("research_subject_id")] = [ item.findtext("yob"), item.findtext("mrn"), item.findtext("facility_code"), ] # iterate through the smi data and generate a # new merged xml's for subject_map and subject_map_exceptions subjectmap_root = etree.Element("subject_map_records") subjectmap_exceptions_root = etree.Element("subject_map_exception_records") exceptions = False for item in smi_data.iter("item"): if item.findtext("research_subject_id") in person_index_dict.keys(): gsmlogger.logger.debug("Processing research_subject_id %s", item.findtext("research_subject_id")) if person_index_dict[item.findtext("research_subject_id")][0] == item.findtext("yob"): gsmlogger.logger.debug("yob matched for research_subject_id %s", item.findtext("research_subject_id")) mrn = etree.SubElement(item, "mrn") mrn.text = person_index_dict[item.findtext("research_subject_id")][1] facility_code = etree.SubElement(item, "facility_code") facility_code.text = person_index_dict[item.findtext("research_subject_id")][2] item.remove(item.find("yob")) subjectmap_root.append(item) else: gsmlogger.logger.debug( "yob not matched for research_subject_id %s", item.findtext("research_subject_id") ) exception_item = etree.Element("item") research_subject_id = etree.SubElement(exception_item, "research_subject_id") research_subject_id.text = item.findtext("research_subject_id") if research_subject_id.text is not None: exceptions = True pi_yob = etree.SubElement(exception_item, "Person_Index_YOB") pi_yob.text = person_index_dict[item.findtext("research_subject_id")][0] hcvt_yob = etree.SubElement(exception_item, "HCVTarget_YOB") hcvt_yob.text = item.findtext("yob") subjectmap_exceptions_root.append(exception_item) # Below code transforms the xml files to csv files transform_xsl = proj_root + "bin/utils/xml2csv.xsl" xslt = etree.parse(transform_xsl) transform = etree.XSLT(xslt) tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files) subject_map_file = tmp_folder + "subject_map.csv" gsmlogger.logger.info("Using path subject map file path: " + subject_map_file) try: subject_map_csv = open(subject_map_file, "w") subject_map_csv.write("%s" % '"research_subject_id","start_date","end_date","mrn","facility_code"\n') for item in subjectmap_root.iter("item"): line = '"{0}","{1}","{2}","{3}","{4}"\n'.format( gsm_lib.handle_blanks(item.findtext("research_subject_id")), gsm_lib.handle_blanks(item.findtext("start_date")), gsm_lib.handle_blanks(item.findtext("end_date")), gsm_lib.handle_blanks(item.findtext("mrn")), gsm_lib.handle_blanks(item.findtext("facility_code")), ) subject_map_csv.write("%s" % line) subject_map_csv.close() except IOError: raise GSMLogger().LogException("Could not open file %s for write", subject_map_file) # remove the smi.xml from the folder because the XSLT process # writes data to smi.xml try: os.remove(smi_path) except OSError: raise GSMLogger().LogException("Could not remove file %s ", smi_path) # send the subject_map.csv to EMR team (sftp server) parse_site_details_and_send(site_catalog_file, subject_map_file, "sftp") if do_keep_gen_files: print " * Keeping the temporary file: " + subject_map_file else: print " * Removing the temporary file: " + subject_map_file os.remove(subject_map_file) # send subject_map_exceptions.csv as email attachment if exceptions: subject_map_exceptions_file = tmp_folder + "subject_map_exceptions.csv" try: subject_map_exceptions_csv = open(subject_map_exceptions_file, "w") except IOError: raise GSMLogger().LogException("Could not open file %s for write", subject_map_exceptions_file) subject_map_exceptions_csv.write("%s" % '"research_subject_id","person_index_yob","redcap_yob"\n') for item in subjectmap_exceptions_root.iter("item"): line = '"{0}","{1}","{2}"\n'.format( gsm_lib.handle_blanks(item.find("research_subject_id").text), gsm_lib.handle_blanks(item.find("Person_Index_YOB").text), gsm_lib.handle_blanks(item.find("HCVTarget_YOB").text), ) subject_map_exceptions_csv.write("%s" % line) subject_map_exceptions_csv.close() parse_site_details_and_send(site_catalog_file, subject_map_exceptions_file, "email") if do_keep_gen_files: print " * Keeping the temporary file: " + subject_map_exceptions_file else: print " * Removing the temporary file: " + subject_map_exceptions_file os.remove(subject_map_exceptions_file)