def is_valid_link(link): '''Invalid links are to images. This works for both implicit and regular style links.''' flags = ['.jpg', '.svg', '.png', '.gif', '.jpeg', '.bmp', '.tiff'] result = True if link and 'page' in link: result = result and (not any([_.has_substr(link['page'].lower(), flag) for flag in flags])) else: return False if link and 'text' in link: result = result and (not any([_.has_substr(link['text'].lower(), flag) for flag in flags])) return result
def process_davivienda_message(message): print('Davivienda: processing message') amount = re.sub(r'[^\d.]', '', nth(lines(message.string), 6)) category_type = nth(lines(message.string), 7) desc = trim(nth(split(nth(lines(message.string), 8), ':'), 1)) if has_substr(upper_case(desc), 'PSE'): print('Davivienda: Ignored PSE payment') return None, None, None if has_substr(lower_case(category_type), 'deposito') or has_substr( lower_case(category_type), 'abono'): category_type = moneylover.CATEGORY_TYPE['income'] category_item = 'Salary' if has_substr( upper_case(desc), 'ACH GNB SUDAMERIS') else 'Others' else: category_type = moneylover.CATEGORY_TYPE['expense'] category_item = categorize(desc) return amount, {'type': category_type, 'item': category_item}, desc
def is_valid_page(page): '''Invalid pages are images or disambiguation pages that were not flagged at the parser level. Also check that the page has more than 5 characters.''' flags = ['.jpg', '.svg', '.png', '.gif', '.jpeg', '.bmp', '.tiff', '(disambiguation)'] has_content = page and (len(page['plaintext'].strip()) > 5) if page and has_content and 'title' in page: return not any([_.has_substr(page['title'].lower(), flag) for flag in flags]) else: return False
def predefined_category(text): if has_substr(lower_case(text), 'une'): return 'Bills & Utilities' if has_substr(lower_case(text), 'rappi'): return 'Food & Beverage' if has_substr(lower_case(text), 'comcel'): return 'Bills & Utilities' if has_substr(lower_case(text), 'nequi'): return 'Nequi' if has_substr(lower_case(text), 'credito visa'): return 'Credit Card' if has_substr(lower_case(text), 'fiducredicorp'): return 'Apartment' if has_substr(lower_case(text), 'canon'): return 'Rentals'
def process_pse_message(message): print('PSE(davivienda): processing message') data = lines(last(message.table.table.find_all('span'))) desc = re.sub(r'<[^<>]*>', '', nth(data)) amount = re.sub(r'[^\d,]', '', nth(data, 1)).replace(',', '.') is_visa = has_substr(lower_case(desc), 'credito visa') if is_visa: visa_category_type = moneylover.CATEGORY_TYPE['income'] visa_category_item = 'Payment' category_type = moneylover.CATEGORY_TYPE['expense'] category_item = categorize(desc) return amount, { 'type': category_type, 'item': category_item }, desc, { 'type': visa_category_type, 'item': visa_category_item } if is_visa else None
def test_has_substr(case, expected): assert _.has_substr(*case) == expected