예제 #1
0
    def en_parsers_speed(self):
        file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt')
        with codecs.open(file_path, 'r', encoding='utf-8') as fr:
            text = fr.read()

        ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/')
        entities_fn = ge_path + 'geoentities.csv'
        aliases_fn = ge_path + 'geoaliases.csv'
        geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn))

        times = {}  # type: Dict[str, float]
        self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times)
        self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times)
        self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times)
        self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times)
        self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times)
        self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times)
        self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times)
        self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times)
        self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times)
        self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times)
        self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times)
        self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times)
        self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times)
        self.check_time(text, lambda s: list(get_money(s)), 'get_money', times)
        self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times)
        self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times)
        self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times)
        self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times)
        self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times)
        self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times)

        self.assertTrue('get_amounts' in times)
    def test_distances_words(self):
        text = 'Today I ran eight kilometers.'
        ds = list(get_distances(text))
        self.assertEqual(1, len(ds))

        ant = list(get_distance_annotations(text))[0]
        self.assertEqual((11, 29), ant.coords)
        cite = ant.get_cite()
        self.assertEqual('/en/distance/8/kilometer', cite)
    def test_distances_digits(self):
        text = 'Today I ran 8 miles.'
        ds = list(get_distances(text))
        self.assertEqual(1, len(ds))

        ant = list(get_distance_annotations(text))[0]
        self.assertEqual((12, 20), ant.coords)
        cite = ant.get_cite()
        self.assertEqual('/en/distance/8.0/mile', cite)
예제 #4
0
 def parse(self, text, text_unit_id, _text_unit_lang,
           **kwargs) -> ParseResults:
     found = list(distances.get_distances(text, return_sources=True))
     if found:
         unique = set(found)
         return ParseResults({
             DistanceUsage: [
                 DistanceUsage(text_unit_id=text_unit_id,
                               amount=item[0],
                               amount_str=item[2],
                               distance_type=item[1],
                               count=found.count(item)) for item in unique
             ]
         })