コード例 #1
0
    def en_parsers_speed(self):
        file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt')
        with codecs.open(file_path, 'r', encoding='utf-8') as fr:
            text = fr.read()

        ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/')
        entities_fn = ge_path + 'geoentities.csv'
        aliases_fn = ge_path + 'geoaliases.csv'
        geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn))

        times = {}  # type: Dict[str, float]
        self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times)
        self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times)
        self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times)
        self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times)
        self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times)
        self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times)
        self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times)
        self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times)
        self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times)
        self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times)
        self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times)
        self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times)
        self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times)
        self.check_time(text, lambda s: list(get_money(s)), 'get_money', times)
        self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times)
        self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times)
        self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times)
        self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times)
        self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times)
        self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times)

        self.assertTrue('get_amounts' in times)
コード例 #2
0
 def test_parse_comission(self):
     text = """
 Pursuant to section 10(d) of the Federal Advisory Committee Act, as amended, notice is hereby given of the following meetings.
 The meetings will be closed to the public in accordance with the provisions set forth in sections 552b(c)(4) and 552b(c)(6), Title 5 U.S.C., as amended. 
 The grant applications and the discussions could disclose confidential trade secrets or commercial property such as patentable material, 
 and personal information concerning individuals associated with the grant applications, the disclosure of which would constitute a clearly unwarranted invasion of personal privacy.
 Name of Committee: Center for Scientific Review Special Emphasis Panel; Small Business: Cancer Biotherapeutics Development.
 """
     ret = list(get_regulations(text))
     self.assertEqual(0, len(ret))
コード例 #3
0
    def test_regulations(self):
        text = 'test 123 U.S.C § 456, code'
        rs = list(get_regulations(text))
        self.assertEqual(1, len(rs))
        self.assertEqual('United States Code', rs[0][0])
        self.assertEqual('123 USC § 456', rs[0][1])

        rs = list(get_regulations(text, as_dict=True))
        self.assertEqual(1, len(rs))
        self.assertEqual('United States Code', rs[0]['regulation_type'])
        self.assertEqual('123 USC § 456', rs[0]['regulation_code'])

        ants = list(get_regulation_annotations(text))
        self.assertEqual(1, len(ants))
        self.assertEqual('en', ants[0].locale)
        self.assertEqual('123 USC § 456', ants[0].name)
        self.assertEqual('United States Code', ants[0].source)

        start = text.find('123')
        self.assertGreater(ants[0].coords[1], ants[0].coords[0])
        self.assertEqual((start, ants[0].coords[1]), ants[0].coords)
コード例 #4
0
 def parse(self, text, text_unit_id, _text_unit_lang,
           **kwargs) -> ParseResults:
     found = list(regulations.get_regulations(text))
     if found:
         unique = set(found)
         return ParseResults({
             RegulationUsage: [
                 RegulationUsage(text_unit_id=text_unit_id,
                                 regulation_type=item[0],
                                 regulation_name=item[1],
                                 count=found.count(item)) for item in unique
             ]
         })
コード例 #5
0
    def extract_features(self,
                         ex_words: List[str],
                         add_to_indexer: bool = False) -> Counter:
        stop_words = set(EN_STOPWORDS)
        regulations = list(lexnlp.get_regulations(" ".join(ex_words)))

        base_filtered = [
            w for w in ex_words
            if w not in stop_words and not any(i.isdigit() for i in w)
        ]
        filtered = []

        for item in base_filtered:
            filtered.append(item)
            if add_to_indexer:
                self.indexer.add_and_get_index(item)

        for item in regulations:
            reg = item[1]
            filtered.append(reg)
            if add_to_indexer:
                self.indexer.add_and_get_index(reg)

        return Counter(filtered)
コード例 #6
0
 def getRegulations(self):
     mem = []
     regulations = list(get_regulations(self.bill_text))
     for reg in regulations:
         mem.append(str(reg[1]))
     self.bill.info['regulations'] = mem