Ejemplo n.º 1
0
def main():
    global configuration_directory

    # obtaining command line arguments for path to config directory
    args = parse_args()
    configuration_directory = os.path.abspath(args['configuration_directory_path'])
    do_keep_gen_files = args['keep']

    # Configure logging
    logger = configure_logging(args['verbose'], args['logfile'])

    conf_file = os.path.join(configuration_directory, 'settings.ini')
    settings = gsm_lib.get_settings(conf_file)
    gsm_lib.read_config(configuration_directory, conf_file, settings)
    site_catalog_file = os.path.join(configuration_directory, settings.site_catalog)

    # Initialize Redcap Interface
    rt = redcap_transactions()
    rt.configuration_directory = configuration_directory

    properties = rt.init_redcap_interface(settings, logger)
    # gets data from the person index for the fields listed in the source_data_schema.xml
    response = rt.get_data_from_redcap(properties, logger)
    xml_tree = etree.fromstring(response)

    #XSL Transformation : transforms the person_index data
    transform_xsl = proj_root + "bin/utils/person_index_transform.xsl"
    xslt = etree.parse(transform_xsl)
    transform = etree.XSLT(xslt)
    person_index_data = transform(xml_tree)

    # # # retrieve smi.xml from the sftp server
    smi_path = get_smi_and_parse(site_catalog_file, settings, logger)
    try:
        smi_data = etree.parse(smi_path)
    except IOError:
        logger.exception("Could not open file: " + smi_path)
        raise

    #sorting both the xml files.
    gsm_lib.sort_element_tree(smi_data)
    gsm_lib.sort_element_tree(person_index_data)
    #generating the person index dictionary
    person_index_dict = {}
    for item in person_index_data.iter('item'):
        person_index_dict[item.findtext('research_subject_id')] = [
            item.findtext('yob'),
            item.findtext('mrn'),
            item.findtext('facility_code')]
    #iterate through the smi data and generate a
    # new merged xml's for subject_map and subject_map_exceptions
    subjectmap_root = etree.Element("subject_map_records")
    subjectmap_exceptions_root = etree.Element("subject_map_exception_records")
    exceptions = False
    for item in smi_data.iter('item'):
        if item.findtext('research_subject_id') in person_index_dict.keys():
            logger.debug("Processing research_subject_id %s", item.findtext('research_subject_id'))
            if (person_index_dict[item.findtext('research_subject_id')][0] == item.findtext('yob')):
                logger.debug("yob matched for research_subject_id %s", item.findtext('research_subject_id'))
                mrn = etree.SubElement(item, "mrn")
                mrn.text = person_index_dict[item.findtext('research_subject_id')][1]
                facility_code = etree.SubElement(item, "facility_code")
                facility_code.text = person_index_dict[item.findtext('research_subject_id')][2]
                item.remove(item.find('yob'))
                subjectmap_root.append(item)

            else:
                logger.debug("yob not matched for research_subject_id %s", item.findtext('research_subject_id'))
                exception_item = etree.Element("item")
                research_subject_id = etree.SubElement(exception_item, "research_subject_id")
                research_subject_id.text = item.findtext('research_subject_id')
                if (research_subject_id.text is not None):
                    exceptions = True
                pi_yob = etree.SubElement(exception_item, "Person_Index_YOB")
                pi_yob.text = person_index_dict[item.findtext('research_subject_id')][0]
                hcvt_yob = etree.SubElement(exception_item, "HCVTarget_YOB")
                hcvt_yob.text = item.findtext('yob')
                subjectmap_exceptions_root.append(exception_item)

    #Below code transforms the xml files to csv files
    transform_xsl = proj_root + "bin/utils/xml2csv.xsl"
    xslt = etree.parse(transform_xsl)
    transform = etree.XSLT(xslt)

    tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files)
    subject_map_file = tmp_folder + "subject_map.csv"
    logger.info('Using path subject map file path: ' + subject_map_file)

    try:
        subject_map_csv = open(subject_map_file, "w")
        subject_map_csv.write("%s" % '"research_subject_id","start_date","end_date","mrn","facility_code"\n')

        for item in subjectmap_root.iter("item"):
            line = '"{0}","{1}","{2}","{3}","{4}"\n'.format(
                gsm_lib.handle_blanks(item.findtext("research_subject_id")),
                gsm_lib.handle_blanks(item.findtext("start_date")),
                gsm_lib.handle_blanks(item.findtext("end_date")),
                gsm_lib.handle_blanks(item.findtext("mrn")),
                gsm_lib.handle_blanks(item.findtext("facility_code")))
            subject_map_csv.write("%s" % line)

        subject_map_csv.close()
    except IOError:
        logger.exception("Could not open file %s for write", subject_map_file)
        raise

    # remove the smi.xml from the folder because the XSLT process
    # writes data to smi.xml
    try:
        os.remove(smi_path)
    except OSError:
        logger.exception("Could not remove file %s ", smi_path)
        raise

    # send the subject_map.csv to EMR team (sftp server)
    parse_site_details_and_send(site_catalog_file, subject_map_file, 'sftp', settings, logger)
    if do_keep_gen_files:
        logger.info('Keeping the temporary file: ' + subject_map_file)
    else:
        logger.info('Removing the temporary file: ' + subject_map_file)
        os.remove(subject_map_file)

    # send subject_map_exceptions.csv as email attachment
    if exceptions:
        subject_map_exceptions_file = tmp_folder + 'subject_map_exceptions.csv'
        try:
            subject_map_exceptions_csv = open(subject_map_exceptions_file, "w")
        except IOError:
            logger.exception("Could not open file %s for write", subject_map_exceptions_file)
            raise
        subject_map_exceptions_csv.write("%s" % '"research_subject_id","person_index_yob","redcap_yob"\n')
        for item in subjectmap_exceptions_root.iter("item"):
            line = '"{0}","{1}","{2}"\n'.format(
                gsm_lib.handle_blanks(item.find("research_subject_id").text),
                gsm_lib.handle_blanks(item.find("Person_Index_YOB").text),
                gsm_lib.handle_blanks(item.find("HCVTarget_YOB").text))
            subject_map_exceptions_csv.write("%s" % line)
        subject_map_exceptions_csv.close()

        parse_site_details_and_send(site_catalog_file, subject_map_exceptions_file, 'email', settings, logger)
        if do_keep_gen_files:
            logger.info('Keeping the temporary file: ' + subject_map_exceptions_file)
        else:
            logger.info('Removing the temporary file: ' + subject_map_exceptions_file)
            os.remove(subject_map_exceptions_file)
def main():
    global configuration_directory
    global do_keep_gen_files

    # obtaining command line arguments for path to config directory
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c",
        dest="configuration_directory_path",
        default=default_configuration_directory,
        required=False,
        help="Specify the path to the configuration directory",
    )

    # read the optional argument `-k` for keeping the generated files
    parser.add_argument(
        "-k",
        "--keep",
        default=default_do_keep_gen_files,
        required=False,
        help="Specify `yes` to preserve the files generated during execution",
    )

    args = vars(parser.parse_args())
    configuration_directory = args["configuration_directory_path"] + "/"
    do_keep_gen_files = False if args["keep"] is None else True

    # read setup options
    global setup
    setup = gsm_lib.read_config(configuration_directory, "setup.json")
    site_catalog_file = configuration_directory + setup["site_catalog"]
    system_log_file = setup["system_log_file"]

    # Configure logging
    global gsmlogger
    gsmlogger = GSMLogger()
    gsmlogger.configure_logging(system_log_file)

    # Initialize Redcap Interface
    rt = redcap_transactions()
    rt.configuration_directory = configuration_directory

    properties = rt.init_redcap_interface(setup, gsmlogger.logger)
    # gets data from the person index for the fields listed in the source_data_schema.xml
    response = rt.get_data_from_redcap(properties, gsmlogger.logger)
    xml_tree = etree.fromstring(response)

    # XSL Transformation : transforms the person_index data
    transform_xsl = proj_root + "bin/utils/person_index_transform.xsl"
    xslt = etree.parse(transform_xsl)
    transform = etree.XSLT(xslt)
    person_index_data = transform(xml_tree)

    # # # retrieve smi.xml from the sftp server
    smi_path = get_smi_and_parse(site_catalog_file)
    if not os.path.exists(smi_path):
        raise GSMLogger().LogException("Error: file " + smi_path + " not found")
    else:
        smi = open(smi_path, "r")
    # Below code merges the 2 xmls
    smi_data = etree.parse(smi_path)
    # sorting both the xml files.
    gsm_lib.sort_element_tree(smi_data)
    gsm_lib.sort_element_tree(person_index_data)
    # generating the person index dictionary
    person_index_dict = {}
    for item in person_index_data.iter("item"):
        person_index_dict[item.findtext("research_subject_id")] = [
            item.findtext("yob"),
            item.findtext("mrn"),
            item.findtext("facility_code"),
        ]
    # iterate through the smi data and generate a
    # new merged xml's for subject_map and subject_map_exceptions
    subjectmap_root = etree.Element("subject_map_records")
    subjectmap_exceptions_root = etree.Element("subject_map_exception_records")
    exceptions = False
    for item in smi_data.iter("item"):
        if item.findtext("research_subject_id") in person_index_dict.keys():
            gsmlogger.logger.debug("Processing research_subject_id %s", item.findtext("research_subject_id"))
            if person_index_dict[item.findtext("research_subject_id")][0] == item.findtext("yob"):
                gsmlogger.logger.debug("yob matched for research_subject_id %s", item.findtext("research_subject_id"))
                mrn = etree.SubElement(item, "mrn")
                mrn.text = person_index_dict[item.findtext("research_subject_id")][1]
                facility_code = etree.SubElement(item, "facility_code")
                facility_code.text = person_index_dict[item.findtext("research_subject_id")][2]
                item.remove(item.find("yob"))
                subjectmap_root.append(item)

            else:
                gsmlogger.logger.debug(
                    "yob not matched for research_subject_id %s", item.findtext("research_subject_id")
                )
                exception_item = etree.Element("item")
                research_subject_id = etree.SubElement(exception_item, "research_subject_id")
                research_subject_id.text = item.findtext("research_subject_id")
                if research_subject_id.text is not None:
                    exceptions = True
                pi_yob = etree.SubElement(exception_item, "Person_Index_YOB")
                pi_yob.text = person_index_dict[item.findtext("research_subject_id")][0]
                hcvt_yob = etree.SubElement(exception_item, "HCVTarget_YOB")
                hcvt_yob.text = item.findtext("yob")
                subjectmap_exceptions_root.append(exception_item)

    # Below code transforms the xml files to csv files
    transform_xsl = proj_root + "bin/utils/xml2csv.xsl"
    xslt = etree.parse(transform_xsl)
    transform = etree.XSLT(xslt)

    tmp_folder = gsm_lib.get_temp_path(do_keep_gen_files)
    subject_map_file = tmp_folder + "subject_map.csv"
    gsmlogger.logger.info("Using path subject map file path: " + subject_map_file)

    try:
        subject_map_csv = open(subject_map_file, "w")
        subject_map_csv.write("%s" % '"research_subject_id","start_date","end_date","mrn","facility_code"\n')

        for item in subjectmap_root.iter("item"):
            line = '"{0}","{1}","{2}","{3}","{4}"\n'.format(
                gsm_lib.handle_blanks(item.findtext("research_subject_id")),
                gsm_lib.handle_blanks(item.findtext("start_date")),
                gsm_lib.handle_blanks(item.findtext("end_date")),
                gsm_lib.handle_blanks(item.findtext("mrn")),
                gsm_lib.handle_blanks(item.findtext("facility_code")),
            )
            subject_map_csv.write("%s" % line)

        subject_map_csv.close()
    except IOError:
        raise GSMLogger().LogException("Could not open file %s for write", subject_map_file)

    # remove the smi.xml from the folder because the XSLT process
    # writes data to smi.xml
    try:
        os.remove(smi_path)
    except OSError:
        raise GSMLogger().LogException("Could not remove file %s ", smi_path)

    # send the subject_map.csv to EMR team (sftp server)
    parse_site_details_and_send(site_catalog_file, subject_map_file, "sftp")
    if do_keep_gen_files:
        print " * Keeping the temporary file: " + subject_map_file
    else:
        print " * Removing the temporary file: " + subject_map_file
        os.remove(subject_map_file)

    # send subject_map_exceptions.csv as email attachment
    if exceptions:
        subject_map_exceptions_file = tmp_folder + "subject_map_exceptions.csv"
        try:
            subject_map_exceptions_csv = open(subject_map_exceptions_file, "w")
        except IOError:
            raise GSMLogger().LogException("Could not open file %s for write", subject_map_exceptions_file)

        subject_map_exceptions_csv.write("%s" % '"research_subject_id","person_index_yob","redcap_yob"\n')
        for item in subjectmap_exceptions_root.iter("item"):
            line = '"{0}","{1}","{2}"\n'.format(
                gsm_lib.handle_blanks(item.find("research_subject_id").text),
                gsm_lib.handle_blanks(item.find("Person_Index_YOB").text),
                gsm_lib.handle_blanks(item.find("HCVTarget_YOB").text),
            )
            subject_map_exceptions_csv.write("%s" % line)
        subject_map_exceptions_csv.close()

        parse_site_details_and_send(site_catalog_file, subject_map_exceptions_file, "email")
        if do_keep_gen_files:
            print " * Keeping the temporary file: " + subject_map_exceptions_file
        else:
            print " * Removing the temporary file: " + subject_map_exceptions_file
            os.remove(subject_map_exceptions_file)