Exemple #1
0
    def en_parsers_speed(self):
        file_path = os.path.join(lexnlp_test_path, 'long_parsed_text.txt')
        with codecs.open(file_path, 'r', encoding='utf-8') as fr:
            text = fr.read()

        ge_path = os.path.join(lexnlp_test_path, 'lexnlp/extract/en/tests/test_geoentities/')
        entities_fn = ge_path + 'geoentities.csv'
        aliases_fn = ge_path + 'geoaliases.csv'
        geo_config = list(DictionaryEntry.load_entities_from_files(entities_fn, aliases_fn))

        times = {}  # type: Dict[str, float]
        self.check_time(text, lambda s: list(get_amounts(s)), 'get_amounts', times)
        self.check_time(text, lambda s: list(get_acts(s)), 'get_acts', times)
        self.check_time(text, lambda s: list(get_citations(s)), 'get_citations', times)
        self.check_time(text, lambda s: list(get_conditions(s)), 'get_conditions', times)
        self.check_time(text, lambda s: list(get_constraints(s)), 'get_constraints', times)
        self.check_time(text, lambda s: list(get_copyright(s)), 'get_copyright', times)
        self.check_time(text, lambda s: list(_get_courts(s)), 'get_courts', times)
        self.check_time(text, lambda s: list(get_cusip(s)), 'get_cusip', times)
        self.check_time(text, lambda s: list(get_dates(s)), 'get_dates', times)
        self.check_time(text, lambda s: list(get_definitions(s)), 'get_definitions', times)
        self.check_time(text, lambda s: list(get_distances(s)), 'get_distances', times)
        self.check_time(text, lambda s: list(get_durations(s)), 'get_durations', times)
        self.check_time(text, lambda s: list(get_geoentities(s, geo_config)), 'get_geoentities', times)
        self.check_time(text, lambda s: list(get_money(s)), 'get_money', times)
        self.check_time(text, lambda s: list(get_percents(s)), 'get_percents', times)
        self.check_time(text, lambda s: list(get_pii(s)), 'get_pii', times)
        self.check_time(text, lambda s: list(get_ratios(s)), 'get_ratios', times)
        self.check_time(text, lambda s: list(get_regulations(s)), 'get_regulations', times)
        self.check_time(text, lambda s: list(get_trademarks(s)), 'get_trademarks', times)
        self.check_time(text, lambda s: list(get_urls(s)), 'get_urls', times)

        self.assertTrue('get_amounts' in times)
Exemple #2
0
def get_salary(text, return_source=False):
    TRIGGER_LIST_SALARY = ["salary", "rate of pay"]
    # text to be found and multiplier to get yearly
    found_time_unit = None
    found_time_units = []
    found_salary_trigger = False
    money = None
    min_annual_salary = 20000  # sample is mostly executives- so this is safe.
    for t in TRIGGER_LIST_SALARY:
        if findWholeWordorPhrase(t)(text) is not None:
            found_salary_trigger = True
            break
    if found_salary_trigger:
        for t in TRIGGER_LIST_TIME_UNIT:
            found_time_unit_temp = findWholeWordorPhrase(t[0])(text)
            if found_time_unit_temp is not None:
                found_time_units.append(t[1])
        if len(found_time_units) > 0:
            found_time_unit = min(found_time_units)
            found_money = list(get_money(text))
            if len(found_money) > 0:
                money_temp = max(found_money, key=lambda item: item[0])
                if money_temp[0] * found_time_unit > min_annual_salary:
                    money = money_temp
    if money is not None:
        if return_source:
            return money, found_time_unit, text
        else:
            return money, found_time_unit
    else:
        return None
 def test_get_money_problem1(self):
     """
     Problem: it was returning 23.6 instead of 23.62 for such cases.
     :return:
     """
     text = '''Exercise Price per Share: 23.62'''
     actual = list(get_money(text, return_sources=False, float_digits=6))
     self.assertEqual(actual[0][0], 23.62)
Exemple #4
0
    def test_money(self):
        text = "100 bucks, 100 dollars, 100 greens"
        ds = list(get_money(text))
        self.assertEqual(1, len(ds))

        ants = list(get_money_annotations(text))
        self.assertEqual(1, len(ds))
        self.assertEqual('en', ants[0].locale)
        self.assertEqual('USD', ants[0].currency)
        self.assertEqual(100.0, ants[0].amount)
 def test_get_money_order(self):
     """
     At some moment there was a problem: get_money() was returning money in reversed order.
     This test is ensures the order is straight.
     :return:
     """
     text = ''' $96,844.00 per month ($31.00 per square foot per year), beginning on the date which is 90 days after 
     the Commencement Date and ending on the Expiration Date.'''
     actual = list(get_money(text, return_sources=False, float_digits=6))
     self.assertEqual(actual[0][0], 96844.0)
Exemple #6
0
 def parse(self, text, text_unit_id, _text_unit_lang,
           **kwargs) -> ParseResults:
     found = list(money.get_money(text, return_sources=True))
     if found:
         unique = set(found)
         return ParseResults({
             CurrencyUsage: [
                 CurrencyUsage(text_unit_id=text_unit_id,
                               amount=item[0],
                               amount_str=item[2],
                               currency=item[1],
                               count=found.count(item)) for item in unique
             ]
         })
 def _extract_variants_from_text(self, field, text: str, **kwargs):
     money = get_money(text, return_sources=False)
     if not money:
         return None
     return [{'currency': m[1], 'amount': m[0]} for m in money]
Exemple #8
0
 def getMoney(self):
     mem = []
     money = list(get_money(self.bill_text))
     for mon in money:
         mem.append(str(mon[0]))
     self.bill.info['money'] = mem
Exemple #9
0
rep_date_list = []
for elem in elems:
    date_lim = elem[1]
    if (date_lim[1] - date_lim[0]) <= 6:
        continue
    rep_text = text[date_lim[0]:date_lim[1]]
    rep_date_list.append(rep_text)
for i in rep_date_list:
    text = text.replace(i, ' <DATE> ')
text = re.sub(dates1, ' <DATE> ', text)

start_time = time.time()

rep_money_list = set()
elems = (list(money.get_money(text, return_sources=True)))
print("Money")
print(elems)
for elem in elems:
    rep_money_list.add(elem[-1])

rep_money_list = list(rep_money_list)

for i in rep_money_list:
    text = text.replace(i, ' <MON> ')

print(time.time() - start_time)

# rep_amt_list=[]
# elems=list(amounts.get_amounts(text, return_sources=True))
# print(elems)