Python get_amounts Beispiele, lexnlp.extract.en.amounts.get_amounts Python Beispiele

Beispiel #1

0

Datei anzeigen

    def en_parsers_speed(self):
        file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt')
        with codecs.open(file_path, 'r', encoding='utf-8') as fr:
            text = fr.read()

        ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/')
        entities_fn = ge_path + 'geoentities.csv'
        aliases_fn = ge_path + 'geoaliases.csv'
        geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn))

        times = {}  # type: Dict[str, float]
        self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times)
        self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times)
        self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times)
        self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times)
        self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times)
        self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times)
        self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times)
        self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times)
        self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times)
        self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times)
        self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times)
        self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times)
        self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times)
        self.check_time(text, lambda s: list(get_money(s)), 'get_money', times)
        self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times)
        self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times)
        self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times)
        self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times)
        self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times)
        self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times)

        self.assertTrue('get_amounts' in times)

Beispiel #2

0

Datei anzeigen

Datei: money.py Projekt: denmonz/Orrick-Flashcards

def get_money_annotations(text: str, float_digits=4) \
        -> Generator[MoneyAnnotation, None, None]:
    for match in CURRENCY_PTN_RE.finditer(text):
        capture = match.capturesdict()
        if not (capture['prefix']
                or capture['postfix']) and not (capture['trigger_word']):
            continue
        prefix = capture['prefix']
        postfix = capture['postfix']
        amount = list(
            get_amounts(capture['amount'][0], float_digits=float_digits))
        if len(amount) != 1:
            continue
        if prefix:
            prefix = prefix[0].lower()
            currency_type = CURRENCY_SYMBOL_MAP.get(prefix)\
                            or CURRENCY_PREFIX_MAP.get(prefix)\
                            or prefix.upper()
        elif postfix:
            postfix = postfix[0].lower()
            currency_type = CURRENCY_TOKEN_MAP.get(postfix) or (
                capture['postfix'][0]).upper()
        else:
            currency_type = None
        if not currency_type:
            currency_type = DEFAULT_CURRENCY
        text = capture['text'][0].strip(
            string.punctuation.replace('$', '') + string.whitespace)
        ant = MoneyAnnotation(coords=match.span(),
                              amount=amount[0],
                              text=text,
                              currency=currency_type)
        yield ant

Beispiel #3

0

Datei anzeigen

Datei: money.py Projekt: trustmeiamlawyer/lexpredict-lexnlp

def get_money(text, return_sources=False, float_digits=4) -> Generator:
    for match in CURRENCY_PTN_RE.finditer(text):
        capture = match.capturesdict()
        if not (capture['prefix'] or capture['postfix']):
            continue
        prefix = capture['prefix']
        postfix = capture['postfix']
        amount = list(
            get_amounts(capture['amount'][0], float_digits=float_digits))
        if len(amount) != 1:
            continue
        if prefix:
            prefix = prefix[0].lower()
            currency_type = CURRENCY_SYMBOL_MAP.get(prefix)\
                            or CURRENCY_PREFIX_MAP.get(prefix)\
                            or prefix.upper()
        else:
            postfix = postfix[0].lower()
            currency_type = CURRENCY_TOKEN_MAP.get(postfix) or (
                capture['postfix'][0]).upper()
        item = (amount[0], currency_type)
        if return_sources:
            item += (capture['text'][0].strip(
                string.punctuation.replace('$', '') + string.whitespace), )
        yield item

Beispiel #4

0

Datei anzeigen

Datei: field_types.py Projekt: blewetda/lexpredict-contraxsuite

 def _extract_variants_from_text(self, field, text: str, **kwargs):
     amounts = get_amounts(text, return_sources=False)
     if not amounts:
         return None
     amounts = [int(i) if int(i) == i else i for i in amounts
                if isinstance(i, (float, int))]
     return amounts or None

Beispiel #5

0

Datei anzeigen

    def get_all_annotations(cls,
                            text: str,
                            float_digits=4) \
            -> List[DurationAnnotation]:

        all_annotations = []

        for match in cls.DURATION_PTN_RE.finditer(text.lower()):
            source_text, number_text, duration_type = match.groups()
            amount = list(get_amounts(number_text, float_digits=float_digits))
            if len(amount) != 1:
                continue
            amount = amount[0]
            if float_digits:
                amount = round(amount, float_digits)
            duration_days = cls.DURATION_MAP[duration_type] * amount
            if duration_type == 'anniversaries':
                duration_type = 'anniversary'
            ant = DurationAnnotation(coords=match.span(),
                                     amount=amount,
                                     duration_type=duration_type,
                                     duration_days=duration_days,
                                     text=source_text.strip())
            all_annotations.append(ant)
        return all_annotations

Beispiel #6

0

Datei anzeigen

Datei: durations.py Projekt: suryak-cs/lexpredict-lexnlp

 def get_all_annotations(
     cls,
     text: str,
     float_digits: int = 4,
 ) -> List[DurationAnnotation]:
     all_annotations: List[DurationAnnotation] = []
     for match in cls.DURATION_PTN_RE.finditer(text.lower()):
         source_text, number_text, duration_type = match.groups()
         amount = list(get_amounts(number_text, float_digits=float_digits))
         if len(amount) != 1:
             continue
         amount = amount[0]
         _duration_fraction: Fraction = cls.DURATION_MAP[duration_type]
         duration_days: Decimal = Decimal(
             (_duration_fraction.numerator * amount) /
             _duration_fraction.denominator)
         if float_digits:
             duration_days: Decimal = quantize_by_float_digit(
                 amount=duration_days, float_digits=float_digits)
         if duration_type == 'anniversaries':
             duration_type = 'anniversary'
         ant: DurationAnnotation = DurationAnnotation(
             coords=match.span(),
             amount=amount,
             duration_type=duration_type,
             duration_days=duration_days,
             text=source_text.strip())
         all_annotations.append(ant)
     return all_annotations

Beispiel #7

0

Datei anzeigen

def get_ratios(text, return_sources=False, float_digits=4) -> Generator:
    for source_text, ratio_1_text, ratio_2_text in RATIO_PTN_RE.findall(
            text.lower()):
        amount_1 = list(get_amounts(ratio_1_text, float_digits=float_digits))
        amount_2 = list(get_amounts(ratio_2_text, float_digits=float_digits))
        if len(amount_1) != 1 or len(amount_2) != 1:
            continue
        amount_1 = amount_1[0]
        amount_2 = amount_2[0]
        if amount_1 == 0 or amount_2 == 0:
            continue
        if float_digits:
            amount_1 = round(amount_1, float_digits)
            amount_2 = round(amount_2, float_digits)
        total = float(amount_1) / amount_2
        item = (amount_1, amount_2, total)
        if return_sources:
            item += (source_text.strip(), )
        yield item

Beispiel #8

0

Datei anzeigen

 def extraction_function(self, field, possible_value, text):
     if possible_value is None and not text:
         return None
     try:
         return float(possible_value)
     except:
         possible_value = str(possible_value) if possible_value else text
         floats = list(get_amounts(possible_value, return_sources=False))
         return ValueExtractionHint.get_value(
             floats, field.item_number) if floats else None

Beispiel #9

0

Datei anzeigen

Datei: ratios.py Projekt: rohitn/lexpredict-lexnlp

def get_ratio_annotations(text: str, float_digits=4) \
        -> Generator[RatioAnnotation, None, None]:
    for match in RATIO_PTN_RE.finditer(text.lower()):
        source_text, ratio_1_text, ratio_2_text = match.groups()
        amount_1 = list(get_amounts(ratio_1_text, float_digits=float_digits))
        amount_2 = list(get_amounts(ratio_2_text, float_digits=float_digits))
        if len(amount_1) != 1 or len(amount_2) != 1:
            continue
        amount_1 = amount_1[0]
        amount_2 = amount_2[0]
        if amount_1 == 0 or amount_2 == 0:
            continue
        if float_digits:
            amount_1 = round(amount_1, float_digits)
            amount_2 = round(amount_2, float_digits)
        total = float(amount_1) / amount_2
        ant = RatioAnnotation(coords=match.span(),
                              text=source_text.strip(),
                              left=amount_1,
                              right=amount_2,
                              ratio=total)
        yield ant

Beispiel #10

0

Datei anzeigen

def get_distances(text, return_sources=False, float_digits=4) -> Generator:
    for source_text, number_text, distance_item in DISTANCE_PTN_RE.findall(text.lower()):
        amount = list(get_amounts(number_text, float_digits=float_digits))
        if len(amount) != 1:
            continue
        distance_type = DISTANCE_SYMBOL_MAP.get(distance_item) or DISTANCE_TOKEN_MAP.get(distance_item)
        amount = amount[0]
        if float_digits:
            amount = round(amount, float_digits)
        item = (amount, distance_type)
        if return_sources:
            item += (source_text.strip(),)
        yield item

Beispiel #11

0

Datei anzeigen

def get_distance_annotations(
        text: str,
        float_digits: int = 4) -> Generator[DistanceAnnotation, None, None]:
    for match in DISTANCE_PTN_RE.finditer(text.lower()):
        source_text, number_text, distance_item = match.groups()
        amount = list(get_amounts(number_text, float_digits=float_digits))
        if len(amount) != 1:
            continue
        distance_type = DISTANCE_SYMBOL_MAP.get(distance_item) \
                        or DISTANCE_TOKEN_MAP.get(distance_item)
        yield DistanceAnnotation(coords=match.span(),
                                 amount=amount[0],
                                 distance_type=distance_type,
                                 text=source_text.strip())

Beispiel #12

0

Datei anzeigen

 def test_amounts(self):
     text = """
     2. Amendment to Interest Rate. Beginning on February 1, 1998, and
             continuing until July 18, 2002, which is the fifth anniversary of the Loan
             conversion date, interest shall be fixed at an annual rate of 7.38%, which rate
             is equal to 200 basis points above the Bank's five-year ""Treasury Constant
             Rate"" in effect on January 23, 1998. In accordance with the Agreement, the
             interest rate shall be adjusted again on July 18, 2002.
     """
     amts = list(get_amounts(text))
     str_vals = ', '.join([str(f) for f in amts])
     self.assertEqual(
         '2.0, 1.0, 1998.0, 18.0, 2002.0, 5, 7.38, 200.0, 5, 23.0, 1998.0, 18.0, 2002.0',
         str_vals)

Beispiel #13

0

Datei anzeigen

Datei: durations.py Projekt: rdamarapati/lexpredict-lexnlp

def get_durations(text, return_sources=False, float_digits=4) -> Generator:
    for source_text, number_text, duration_type in DURATION_PTN_RE.findall(text.lower()):
        amount = list(get_amounts(number_text, float_digits=float_digits))
        if len(amount) != 1:
            continue
        amount = amount[0]
        if float_digits:
            amount = round(amount, float_digits)
        duration_days = DURATION_MAP[duration_type] * amount
        if duration_type == 'anniversaries':
            duration_type = 'anniversary'
        item = (duration_type, amount, duration_days)
        if return_sources:
            item += (source_text.strip(),)
        yield item

Beispiel #14

0

Datei anzeigen

 def parse(self, text, text_unit_id, _text_unit_lang,
           **kwargs) -> ParseResults:
     found = list(
         amounts.get_amounts(text,
                             return_sources=True,
                             extended_sources=False))
     if found:
         unique = set(found)
         return ParseResults({
             AmountUsage: [
                 AmountUsage(text_unit_id=text_unit_id,
                             amount=item[0],
                             amount_str=item[1][:300] if item[1] else None,
                             count=found.count(item)) for item in unique
             ]
         })

Beispiel #15

0

Datei anzeigen

Datei: field_types.py Projekt: viralsteroids/lexpredict-contraxsuite

 def _extract_variants_from_text(self, field, text: str, **kwargs):
     amounts = get_amounts(text, return_sources=False)
     return list(amounts) if amounts else None

Beispiel #16

0

Datei anzeigen

 def _extract_variants_from_text(self, field, text: str):
     amounts = get_amounts(text, return_sources=False)
     if not amounts:
         return None
     amounts = [n for n in amounts if n.is_integer()]
     return amounts or None