Python CleanText.clean Exemples, weboob.browser.filters.standard.CleanText.clean Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : pages.py Projet : sourcery-ai-bot/weboob

 def filter(self, el):
     index = 1 if len(el) > 1 else 0
     content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index]))
     a_time = content.split(' - ')[0]
     regexp = re.compile(ur'(?P<hh>\d+)h?(?P<mm>\d+)')
     m = regexp.search(a_time)
     return time(int(m.groupdict()['hh'] or 0), int(m.groupdict()['mm'] or 0))

Exemple #2

0

Afficher le fichier

Fichier : pages.py Projet : Konubinix/weboob

 def filter(self, el):
     index = 1 if len(el) > 1 else 0
     content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index]))
     a_time = content.split(' - ')[0]
     regexp = re.compile(ur'(?P<hh>\d+)h?(?P<mm>\d+)')
     m = regexp.search(a_time)
     return time(int(m.groupdict()['hh'] or 0), int(m.groupdict()['mm'] or 0))

Exemple #3

0

Afficher le fichier

Fichier : pages.py Projet : kyrre/weboob

 def filter(self, el):
     index = 1 if len(el) > 1 else 0
     content = CleanText.clean(CleanText(".", ["HORAIRES"])(el[index]))
     a_time = content.split(" - ")[0]
     regexp = re.compile(ur"(?P<hh>\d+)h?(?P<mm>\d+)")
     m = regexp.search(a_time)
     return time(int(m.groupdict()["hh"] or 0), int(m.groupdict()["mm"] or 0))

Exemple #4

0

Afficher le fichier

Fichier : pages.py Projet : antibios/weboob

    def filter(self, el):
        index = 1 if len(el) > 1 else 0
        content = CleanText.clean(CleanText('.', ['HORAIRES'])(el[index]))
        a_price = content.split(' - ')[-1]
        parsed_price = re.findall(r"\d*\,\d+|\d+", " ".join(a_price))

        if parsed_price and len(parsed_price) > 0:
            return float(parsed_price[0].replace(',', '.'))

        return float(0)

Exemple #5

0

Afficher le fichier

Fichier : pages.py Projet : antibios/weboob

    def filter(self, el):
        content = CleanText.clean(CleanText(CleanHTML('.'), ['*'])(el[0]))
        a_date = content[0:content.index(' - ')]

        for fr, en in date_util.DATE_TRANSLATE_FR:
            a_date = fr.sub(en, a_date)

        try:
            _month = datetime.strptime(a_date, "%A %d %B").month
            if (datetime.now().month > _month):
                a_date += u' %i' % (datetime.now().year + 1)
            else:
                a_date += u' %i' % (datetime.now().year)
        except ValueError:
            pass

        return datetime.strptime(a_date, "%A %d %B %Y")

Exemple #6

0

Afficher le fichier

Fichier : pages.py Projet : dasimon/weboob

    def next_page(self):
        try:
            form = self.page.get_form('//form[@id="paginationForm"]')
        except FormNotFound:
            return

        text = CleanText.clean(form.el)
        m = re.search(u'(\d+) / (\d+)', text or '', flags=re.MULTILINE)
        if not m:
            return

        cur = int(m.group(1))
        last = int(m.group(2))

        if cur == last:
            return

        form['page'] = str(cur + 1)
        return form.request

Exemple #7

0

Afficher le fichier

Fichier : pages.py Projet : nojhan/weboob-devel

    def next_page(self):
        try:
            form = self.page.get_form('//form[@id="paginationForm"]')
        except FormNotFound:
            return

        text = CleanText.clean(form.el)
        m = re.search(u'(\d+) / (\d+)', text or '', flags=re.MULTILINE)
        if not m:
            return

        cur = int(m.group(1))
        last = int(m.group(2))

        if cur == last:
            return

        form['page'] = str(cur + 1)
        return form.request

Exemple #8

0

Afficher le fichier

    def get_list(self):
        account = None

        for cpt in self.document.xpath(
                '//a[@class="synthese_id_compte" or @class="synthese_carte_differe"]'
        ):
            url_to_parse = cpt.xpath('@href')[0].replace("\n", "")  # link
            # account._link_id = lien vers historique d'un compte (courant ou livret)
            if '/mes-comptes/livret/' in url_to_parse:
                compte_id_re = re.compile(r'.*\?(.*)$')
                link_id = '/fr/prive/mes-comptes/livret/consulter-situation/consulter-solde.jsp?%s' % \
                    (compte_id_re.search(url_to_parse).groups()[0])
            else:
                link_id = url_to_parse

            number = cpt.xpath('./span[@class="synthese_numero_compte"]')
            if len(number) == 0:
                account._card_links.append(link_id)
                continue

            account = Account()
            account.id = self.parser.tocleanstring(number[0]).replace(
                u'N°', '')

            try:
                balance = self.parser.tocleanstring(
                    cpt.xpath('./span[contains(@class, "synthese_solde")]')[0])
            except IndexError:
                continue

            account.balance = Decimal(Transaction.clean_amount(balance))
            account.currency = account.get_currency(balance)
            account._link_id = link_id
            account._card_links = []
            account.label = (' '.join([
                CleanText.clean(part) for part in cpt.xpath('./text()')
            ])).strip(' - ').strip()

            for pattern, type in self.ACCOUNT_TYPES.iteritems():
                if pattern in account._link_id:
                    account.type = type

            yield account

Exemple #9

0

Afficher le fichier

Fichier : accounts_list.py Projet : dasimon/weboob

    def get_list(self):
        account = None

        for cpt in self.document.xpath('//a[@class="synthese_id_compte" or @class="synthese_carte_differe"]'):
            url_to_parse = cpt.xpath('@href')[0].replace("\n", "")  # link
            # account._link_id = lien vers historique d'un compte (courant ou livret)
            if '/mes-comptes/livret/' in url_to_parse:
                compte_id_re = re.compile(r'.*\?(.*)$')
                link_id = '/fr/prive/mes-comptes/livret/consulter-situation/consulter-solde.jsp?%s' % \
                    (compte_id_re.search(url_to_parse).groups()[0])
            else:
                link_id = url_to_parse

            number = cpt.xpath('./span[@class="synthese_numero_compte"]')
            if len(number) == 0:
                account._card_links.append(link_id)
                continue

            account = Account()
            account.id = self.parser.tocleanstring(number[0]).replace(u'N°', '')

            try:
                balance = self.parser.tocleanstring(cpt.xpath('./span[contains(@class, "synthese_solde")]')[0])
            except IndexError:
                continue

            account.balance = Decimal(Transaction.clean_amount(balance))
            account.currency = account.get_currency(balance)
            account._link_id = link_id
            account._card_links = []
            account.label = (' '.join([CleanText.clean(part) for part in cpt.xpath('./text()')])).strip(' - ').strip()

            for pattern, type in self.ACCOUNT_TYPES.iteritems():
                if pattern in account._link_id:
                    account.type = type

            yield account