Python process_analysis 예제들, regulation.changes.process_analysis Python 예제들

예제 #1

0

파일 보기

    def test_process_analysis_no_existing(self):
        notice_xml = etree.fromstring("""
            <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <fdsys>
                <date>2015-11-17</date>
              </fdsys>
              <preamble>
                <documentNumber>2015-12345</documentNumber>
              </preamble>
              <changeset></changeset>
              <analysis label="1234-Analysis">
                <analysisSection target="1234-2" notice="2015-12345" date="2015-11-17">An existing analysis</analysisSection>
                <analysisSection target="1234-3" notice="2015-12345" date="2015-11-17">An unchanging analysis</analysisSection>
              </analysis>
            </notice>""")
        regulation_xml = etree.fromstring("""
            <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <part label="1234"></part>
            </regulation>""")

        result = process_analysis(regulation_xml, notice_xml)

        analysis = result.find('.//{eregs}analysis')
        self.assertTrue(analysis is not None)

        sections = analysis.findall('{eregs}analysisSection')
        self.assertEquals(len(sections), 2)

예제 #2

0

파일 보기

파일: regulation_changes_tests.py 프로젝트: willbarton/regulations-xml-parser

    def test_process_analysis_no_existing(self):
        notice_xml = etree.fromstring("""
            <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <fdsys>
                <date>2015-11-17</date>
              </fdsys>
              <preamble>
                <documentNumber>2015-12345</documentNumber>
              </preamble>
              <changeset></changeset>
              <analysis label="1234-Analysis">
                <analysisSection target="1234-2" notice="2015-12345" date="2015-11-17">An existing analysis</analysisSection>
                <analysisSection target="1234-3" notice="2015-12345" date="2015-11-17">An unchanging analysis</analysisSection>
              </analysis>
            </notice>""")
        regulation_xml = etree.fromstring("""
            <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <part label="1234"></part>
            </regulation>""")

        result = process_analysis(regulation_xml, notice_xml)

        analysis = result.find('.//{eregs}analysis')
        self.assertTrue(analysis is not None)

        sections = analysis.findall('{eregs}analysisSection')
        self.assertEquals(len(sections), 2)

예제 #3

0

파일 보기

    def test_process_analysis(self):
        notice_xml = etree.fromstring("""
            <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <fdsys>
                <date>2014-11-17</date>
              </fdsys>
              <preamble>
                <documentNumber>2015-12345</documentNumber>
              </preamble>
              <changeset></changeset>
              <analysis label="1234-Analysis">
                <analysisSection target="1234-1" notice="2015-12345" date="2015-11-17">An added analysis</analysisSection>
                <analysisSection target="1234-2" notice="2015-12345" date="2015-11-17">An updated analysis</analysisSection>
              </analysis>
            </notice>""")
        regulation_xml = etree.fromstring("""
            <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <part label="1234"></part>
              <analysis label="1234-Analysis">
                <analysisSection target="1234-2" notice="2014-12345" date="2014-11-17">An existing analysis</analysisSection>
                <analysisSection target="1234-3" notice="2014-12345" date="2014-11-17">An unchanging analysis</analysisSection>
              </analysis>
            </regulation>""")

        result = process_analysis(regulation_xml, notice_xml)

        sections = result.findall('.//{eregs}analysisSection')
        self.assertEquals(len(sections), 4)

        first_analysis = result.find(
            './/{eregs}analysisSection[@target="1234-1"]')
        third_analysis = result.find(
            './/{eregs}analysisSection[@target="1234-3"]')
        self.assertEquals(first_analysis.get('notice'), '2015-12345')
        self.assertEquals(third_analysis.get('notice'), '2014-12345')
        self.assertEquals(first_analysis.get('date'), '2015-11-17')
        self.assertEquals(third_analysis.get('date'), '2014-11-17')

        second_analysis = result.findall(
            './/{eregs}analysisSection[@target="1234-2"]')
        self.assertEquals(len(second_analysis), 2)
        self.assertEquals(second_analysis[0].get('date'), '2014-11-17')
        self.assertEquals(second_analysis[0].get('notice'), '2014-12345')
        self.assertEquals(second_analysis[1].get('date'), '2015-11-17')
        self.assertEquals(second_analysis[1].get('notice'), '2015-12345')

예제 #4

0

파일 보기

파일: regulation_changes_tests.py 프로젝트: willbarton/regulations-xml-parser

    def test_process_analysis(self):
        notice_xml = etree.fromstring("""
            <notice xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <fdsys>
                <date>2014-11-17</date>
              </fdsys>
              <preamble>
                <documentNumber>2015-12345</documentNumber>
              </preamble>
              <changeset></changeset>
              <analysis label="1234-Analysis">
                <analysisSection target="1234-1" notice="2015-12345" date="2015-11-17">An added analysis</analysisSection>
                <analysisSection target="1234-2" notice="2015-12345" date="2015-11-17">An updated analysis</analysisSection>
              </analysis>
            </notice>""")
        regulation_xml = etree.fromstring("""
            <regulation xmlns="eregs" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="eregs ../../eregs.xsd">
              <part label="1234"></part>
              <analysis label="1234-Analysis">
                <analysisSection target="1234-2" notice="2014-12345" date="2014-11-17">An existing analysis</analysisSection>
                <analysisSection target="1234-3" notice="2014-12345" date="2014-11-17">An unchanging analysis</analysisSection>
              </analysis>
            </regulation>""")

        result = process_analysis(regulation_xml, notice_xml)

        sections = result.findall('.//{eregs}analysisSection')
        self.assertEquals(len(sections), 4)

        first_analysis = result.find('.//{eregs}analysisSection[@target="1234-1"]')
        third_analysis = result.find('.//{eregs}analysisSection[@target="1234-3"]')
        self.assertEquals(first_analysis.get('notice'), '2015-12345')
        self.assertEquals(third_analysis.get('notice'), '2014-12345')
        self.assertEquals(first_analysis.get('date'), '2015-11-17')
        self.assertEquals(third_analysis.get('date'), '2014-11-17')

        second_analysis = result.findall('.//{eregs}analysisSection[@target="1234-2"]')
        self.assertEquals(len(second_analysis), 2)
        self.assertEquals(second_analysis[0].get('date'), '2014-11-17')
        self.assertEquals(second_analysis[0].get('notice'), '2014-12345')
        self.assertEquals(second_analysis[1].get('date'), '2015-11-17')
        self.assertEquals(second_analysis[1].get('notice'), '2015-12345')

예제 #5

0

파일 보기

파일: regml.py 프로젝트: cfpb/regulations-xml-parser

def apply_notice(regulation_file, notice_file):
    """ Apply notice changes """
    # Read the RegML starting point
    regulation_file = find_file(regulation_file)

    with open(regulation_file, 'r') as f:
        left_reg_xml = f.read()
    parser = etree.XMLParser(huge_tree=True)
    left_xml_tree = etree.fromstring(left_reg_xml, parser)

    # Read the notice file
    notice_file = find_file(notice_file, is_notice=True)
    with open(notice_file, 'r') as f:
        notice_string = f.read()
    parser = etree.XMLParser(huge_tree=True)
    notice_xml = etree.fromstring(notice_string, parser)

    # Validate the files
    regulation_validator = get_validator(left_xml_tree)
    notice_validator = get_validator(notice_xml)

    # Process the notice changeset
    new_xml_tree = process_changes(left_xml_tree, notice_xml)

    # Add in any new analysis
    new_xml_tree = process_analysis(new_xml_tree, notice_xml)

    # Write the new xml tree
    new_xml_string = etree.tostring(new_xml_tree,
                                    pretty_print=True,
                                    xml_declaration=True,
                                    encoding='UTF-8')
    new_path = os.path.join(
        os.path.dirname(regulation_file),
        os.path.basename(notice_file))
    with open(new_path, 'w') as f:
        print("Writing regulation to {}".format(new_path))
        f.write(new_xml_string)

예제 #6

0

파일 보기

파일: regml.py 프로젝트: cfpb/regulations-xml-parser

def apply_through(cfr_title, cfr_part, start=None, through=None,
                  fix_notices=False, skip_fix_notices=[],
                  skip_fix_notices_through=None):
    # Get list of notices that apply to this reg
    # Look for locally available notices
    regml_notice_files = find_all(cfr_part, is_notice=True)

    regml_notices = []
    for notice_file in regml_notice_files:
        file_name = os.path.join(notice_file)
        with open(file_name, 'r') as f:
            notice_xml = f.read()
        parser = etree.XMLParser(huge_tree=True)

        try:
            xml_tree = etree.fromstring(notice_xml, parser)
        except etree.XMLSyntaxError as e:
            print(colored('Syntax error in {}'.format(notice_file), 'red'))
            print(e)
            return

        doc_number = xml_tree.find(
            './{eregs}preamble/{eregs}documentNumber').text
        effective_date = xml_tree.find(
            './{eregs}preamble/{eregs}effectiveDate').text
        applies_to = xml_tree.find(
            './{eregs}changeset').get('leftDocumentNumber')
        if applies_to is None:
            # Major problem here
            print(colored("Error locating"),
                  colored("leftDocumentNumber", attrs=['bold']),
                  colored("attribute in"),
                  colored("{}".format(doc_number), 'red',
                          attrs=['bold']))
            return

        regml_notices.append((doc_number, effective_date, applies_to, file_name))

    if cfr_part in settings.CUSTOM_NOTICE_ORDER:
        order = settings.CUSTOM_NOTICE_ORDER[cfr_part]
        regml_notices.sort(key=lambda n: order.index(n[0]))

    else:
        regml_notices.sort(key=lambda n: n[1])
    
    regs = [nn[2] for nn in regml_notices]
    regs.sort()

    # If no notices found, issue error message
    if not regml_notices:
        print(colored("\nNo available notices for reg {} in part {}".format(cfr_part, cfr_title)))
        return

    # If initial version is not findable, issue error message
    if regs[0] is None:
        print(colored("\nError reading initial version and apply order for reg {} in part {}. No changes have been made.".format(cfr_part, cfr_title),
                      attrs=['bold']))
        return

    # Generate prompt for user
    print(colored("\nAvailable notices for reg {}:".format(cfr_part),
          attrs=['bold']))
    print("{:>3}. {:<22}(Initial version)".format(0, regs[0]))
    # Process notices found
    for kk in range(len(regml_notices)):
        print("{0:>3}. {1[0]:<22}(Effective: {1[1]})".format(kk+1,
                                               regml_notices[kk]))
    print()

    # Possible answers are blank (all), the numbers, or the notice names
    possible_indices = [str(kk) for kk in range(len(regml_notices) + 1)]
    possible_notices = [nn[0] for nn in regml_notices]

    # If notice number is supplied, use that one
    if through is not None:
        print("Command-line option selected notice '{}'".format(through))
        answer = through
    else:
        # Get user input to specify end version
        answer = None
        while answer not in [""] + possible_indices + possible_notices:
            answer = raw_input('Press enter to apply all or enter notice number: [all] ')

    if len(answer) == 0:
        # Apply notices
        last_ver_idx = len(regml_notices) - 1
    elif answer is "0":
        # Cancel - this is just the initial version
        print(colored("CANCELED: Version", attrs=['bold']),
              colored("{}".format(regs[0]), 'yellow', attrs=['bold']),
              colored("is the initial version - no changes have been made.", attrs=['bold']))
        return
    elif answer in possible_indices:
        # Apply notices through answer-1 to adjust for the initial ver offset
        last_ver_idx = int(answer) - 1
    elif answer in possible_notices:
        # Find index to stop at in notice list
        last_ver_idx = possible_notices.index(answer)
    else:
        print(colored("ERROR: Notice", attrs=['bold']),
              colored("{}".format(answer), 'red', attrs=['bold']),
              colored("does not exist - no changes have been made.", attrs=['bold']))
        return

    print(colored("\nApplying notices through {0[0]}\n".format(regml_notices[last_ver_idx]),
          attrs=['bold']))

    # Perform the notice application process
    reg_path = os.path.abspath(os.path.join(settings.XML_ROOT,
                                            'regulation',
                                            cfr_part,
                                            '{}.xml'.format(regs[0])))
    print("Opening initial version {}".format(reg_path))
    regulation_file = find_file(reg_path)
    with open(regulation_file, 'r') as f:
        left_reg_xml = f.read()
    parser = etree.XMLParser(huge_tree=True)
    left_xml_tree = etree.fromstring(left_reg_xml, parser)

    kk = 1
    prev_tree = left_xml_tree
    for notice in regml_notices[:last_ver_idx+1]:
        doc_number, effective_date, prev_notice, file_name = notice

        print("[{}] Applying notice {} from {} to version {}".format(kk,
                                                                     doc_number,
                                                                     file_name,
                                                                     prev_notice))

        # Open the notice file
        notice_file = find_file(file_name, is_notice=True)
        with open(notice_file, 'r') as f:
            notice_string = f.read()
        parser = etree.XMLParser(huge_tree=True)

        notice_xml = etree.fromstring(notice_string, parser)

        # TODO: Validate labels for json-compliance?
        # Example: JSON fails on upload only for interpParagraphs without "Interp" in them

        # Validate the files
        regulation_validator = get_validator(prev_tree)
        terms_layer = build_terms_layer(prev_tree)

        try:
            notice_validator = get_validator(notice_xml, raise_instead_of_exiting=True)
        except Exception as e:
            print("[{}]".format(kk),
                  colored("Exception occurred in notice", 'red'),
                  colored(doc_number, attrs=['bold']),
                  colored("; details are below. ", 'red'),
                  "To retry this single notice, use:\n\n",
                  colored("> ./regml.py apply-notice {0}/{1} {0}/{2}\n".format(cfr_part,
                                                                               prev_notice,
                                                                               doc_number),
                          attrs=['bold']))
            sys.exit(0)

        # validate the notice XML with the layers derived from the
        # tree of the previous version
        reload_notice = False
        skip_notices = list(skip_fix_notices)

        if skip_fix_notices_through is not None:
            if skip_fix_notices_through in possible_notices:
                last_fix_idx = possible_notices.index(skip_fix_notices_through)
                skip_notices.extend(possible_notices[:last_fix_idx + 1])

        if fix_notices and doc_number not in skip_notices:
            print('Fixing notice number {}:'.format(doc_number))
            notice_validator.validate_terms(notice_xml, terms_layer)
            notice_validator.validate_term_references(notice_xml, terms_layer, notice_file)
            notice_terms_layer = build_terms_layer(notice_xml)
            notice_validator.validate_term_references(notice_xml, notice_terms_layer, notice_file)
            notice_validator.fix_omitted_cites(notice_xml, notice_file)
            reload_notice = True

        # at this point the file has possibly changed, so we should really reload it
        if reload_notice:
            with open(notice_file, 'r') as f:
                notice_string = f.read()
            parser = etree.XMLParser(huge_tree=True)

            notice_xml = etree.fromstring(notice_string, parser)

        # Process the notice changeset
        try:
            new_xml_tree = process_changes(prev_tree, notice_xml)
        except Exception as e:
            print("[{}]".format(kk),
                  colored("Exception occurred; details are below. ".format(kk), 'red'),
                  "To retry this single notice, use:\n\n",
                  colored("> ./regml.py apply-notice {0}/{1} {0}/{2}\n".format(cfr_part,
                                                                               prev_notice,
                                                                               doc_number),
                          attrs=['bold']))
            raise e

        # Add in any new analysis
        new_xml_tree = process_analysis(new_xml_tree, notice_xml)

        # Write the new xml tree
        new_xml_string = etree.tostring(new_xml_tree,
                                        pretty_print=True,
                                        xml_declaration=True,
                                        encoding='UTF-8')
        new_path = os.path.join(
            os.path.dirname(regulation_file),
            os.path.basename(notice_file))
        with open(new_path, 'w') as f:
            print("[{}] Writing regulation to {}".format(kk, new_path))
            f.write(new_xml_string)

        prev_tree = new_xml_tree
        kk += 1