def test_bankr_courts_wo_nltk(self):
     text = 'One one Bankr. E.D.N.C. two two two.'
     courts = list(
         get_courts(text,
                    court_config_list=self.build_courts_config(),
                    simplified_normalization=True))
     self.assertEqual(1, len(courts))
Esempio n. 2
0
 def parse(self, log: ProcessLogger, text, text_unit_id, text_unit_lang, **kwargs) -> ParseResults:
     court_config = dict_data_cache.get_court_config()
     found = [dict_entities.get_entity_id(i[0])
              for i in courts.get_courts(text,
                                         court_config_list=court_config,
                                         text_languages=[text_unit_lang])]
     if found:
         unique = set(found)
         return ParseResults({CourtUsage: [CourtUsage(text_unit_id=text_unit_id,
                                                      court_id=court_id,
                                                      count=found.count(court_id)) for court_id in unique]})
    def parse_courts_legacy_function(self, text: str):
        court_df = pandas \
            .read_csv(
            "https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts"
            ".csv")

        # Create config objects
        court_config_list = []
        for _, row in court_df.iterrows():
            c = entity_config(row["Court ID"], row["Court Name"], 0,
                              row["Alias"].split(";") if not pandas.isnull(row["Alias"]) else [])
            court_config_list.append(c)

        return get_courts(text, court_config_list)
Esempio n. 4
0
 def parse(self, log: ProcessLogger, text, text_unit_id, text_unit_lang,
           document_initial_load: bool = False, **kwargs) -> ParseResults:
     from apps.extract.app_vars import SIMPLE_LOCATOR_TOKENIZATION
     simple_norm = SIMPLE_LOCATOR_TOKENIZATION.val
     court_config = dict_data_cache.get_court_config()
     found = [i[0].id
              for i in courts.get_courts(text,
                                         court_config_list=court_config,
                                         text_languages=[text_unit_lang],
                                         simplified_normalization=simple_norm)]
     if found:
         unique = set(found)
         return ParseResults({CourtUsage: [CourtUsage(text_unit_id=text_unit_id,
                                                      court_id=court_id,
                                                      count=found.count(court_id)) for court_id in unique]})
    def parse_courts_legacy_function(self, text: str):
        court_df = pandas \
            .read_csv(
            "https://raw.githubusercontent.com/LexPredict/lexpredict-legal-dictionary/1.0.2/en/legal/us_courts"
            ".csv")

        # Create config objects
        court_config_list = []
        for _, row in court_df.iterrows():
            aliases = []
            if not pandas.isnull(row['Alias']):
                aliases = [
                    DictionaryEntryAlias(r) for r in row['Alias'].split(';')
                ]
            c = DictionaryEntry(id=int(row['Court ID']),
                                name=row['Court Name'],
                                priority=0,
                                name_is_alias=True,
                                aliases=aliases)
            court_config_list.append(c)

        return get_courts(text, court_config_list)
 def test_bankr_courts(self):
     text = 'One one Bankr. E.D.N.C. two two two.'
     courts = list(
         get_courts(text, court_config_list=self.build_courts_config()))
     self.assertEqual(1, len(courts))
Esempio n. 7
0
 def extract_courts(self, text=None):
     if not text:
         text = self.text
     return list(lex_courts.get_courts(text))
Esempio n. 8
0
 def parse_courts_legacy_function(self, text: str):
     court_config_list = self.load_en_courts()
     return get_courts(text, court_config_list)