Example #1
0
 def obj_photos(self):
     photos = []
     for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
         photo_url = Attr('.', 'src')(photo)
         photo_url = photo_url.replace('640/480', '800/600')
         photos.append(HousingPhoto(photo_url))
     return photos
Example #2
0
    def update_origin_account(self, origin_account):
        for account in self.doc.xpath('//ul[@id="idCptFrom"]//li'):
            # get all account data
            data = Attr('.', 'data-comptecomplet')(account)
            json_data = json.loads(data.replace('"', '"'))

            if (
                origin_account.label == CleanText().filter(json_data['libelleCompte'])
                and origin_account.iban == json_data['ibanCompte']
            ):
                origin_account._currency_code = json_data['codeDevise']
                origin_account._formatted_iban = json_data['ibanFormateCompte']
                origin_account._min_amount = json_data['montantMin']
                origin_account._max_amount = json_data['montantMax']
                origin_account._decimal_code = json_data['codeDecimal']
                origin_account._manage_counter = json_data['guichetGestionnaire']
                origin_account._account_title = json_data['intituleCompte']
                origin_account._bic = json_data['bicCompte']
                origin_account._id_service = json_data['idPrestation']
                origin_account._product_code = json_data['codeProduit']
                origin_account._underproduct_code = json_data['codeSousProduit']
                break
        else:
            # some accounts are not able to do transfer
            self.logger.warning('Account %s not found on transfer page', origin_account.label)
Example #3
0
            def parse(self, el):
                raw = self.extract.match(el.text).group("html")
                raw = raw.replace('\\"', '"').replace('\\n', '').replace('\\/', '/')
                parsed = lxml.html.fromstring(raw)

                self.env['name'] = CleanText('.//span[@class="popUpTitleBold"]')(parsed)
                self.env['object'] = CleanText('.//span[@class="popUpTitleNormal"]')(parsed).strip(' /')
                url = Attr('.//div[@class="popUpMsDiagramm"]/img', 'src')(parsed)
                self.env['id'] = url.split('_')[1]

                for tr in parsed.xpath('.//tr'):
                    td = tr.xpath('.//td')
                    if len(td) == 1 and "Datum" in td[0].text:
                        l = td[0].text.split()[1:3]
                        self.env['datetime'] = "%s %s" % (l[0], l[1])
                    elif len(td) == 2:
                        if "Wasserstand" in td[0].text:
                            self.env['levelvalue'] = td[1].text.split()[0]
                        elif "Durchfluss" in td[0].text:
                            self.env['flowvalue'] = td[1].text.split()[0]
                        elif "Tendenz" in td[0].text:
                            try:
                                self.env['forecast'] = Attr('img', 'src')(td[1]).split("/")[-1]
                            except ParseError:
                                self.env['forecast'] = None
                # TODO
                self.env['alarm'] = None
Example #4
0
    def get_history_jid(self):
        span = self.doc.xpath('//span[@id="index:panelASV"]')
        if len(span) > 1:
            # Assurance Vie, we do not support this kind of account.
            return None

        span = Attr('//span[starts-with(@id, "index:j_id")]', 'id')(self.doc)
        jid = span.split(':')[1]
        return jid
Example #5
0
 def obj_DPE(self):
     DPE = Attr(
         '//div[has-class("energy-box")]//div[has-class("energy-rank")]',
         'class',
         default=""
     )(self)
     if DPE:
         DPE = [x.replace("energy-rank-", "").upper()
                for x in DPE.split() if x.startswith("energy-rank-")][0]
     return getattr(ENERGY_CLASS, DPE, NotAvailable)
Example #6
0
 def obj_photos(self):
     photos = []
     url = Attr(
         './/div[has-class("offer-picture")]//img',
         'src'
     )(self)
     if url:
         url = url.replace('400x267', '800x600')
         url = urljoin(self.page.url, url)  # Ensure URL is absolute
         photos.append(HousingPhoto(url))
     return photos
Example #7
0
 def obj_photos(self):
     url = Attr(
         '.',
         'data-img',
         default=None
     )(self)
     if url:
         url = unquote(url)
         if "http://" in url[3:]:
             rindex = url.rfind("?")
             if rindex == -1:
                 rindex = None
             url = url[url.find("http://", 3):rindex]
         return [HousingPhoto(url)]
     else:
         return NotLoaded
Example #8
0
    def login(self, login, password):
        maxlength = Attr('//input[@id="Email"]', 'data-val-maxlength-max')(self.doc)
        regex = Attr('//input[@id="Email"]', 'data-val-regex-pattern')(self.doc)
        # their regex is: ^([\w\-+\.]+)@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([\w-]+\.)+))([a-zA-Z]{2,15}|[0-9]{1,3})(\]?)$
        # but it is not very good, we escape - inside [] to avoid bad character range Exception
        regex = regex.replace('[\w-+\.]', '[\w\-+\.]')

        if len(login) > maxlength:  # actually it's 60 char
            raise BrowserIncorrectPassword(Attr('//input[@id="Email"]', 'data-val-maxlength')(self.doc))

        if not re.match(regex, login):
            raise BrowserIncorrectPassword(Attr('//input[@id="Email"]', 'data-val-regex')(self.doc))

        form = self.get_form(xpath='//form[contains(@action, "/Login/Login")]')
        form['Email'] = login
        form['Password'] = password
        form.submit()
Example #9
0
 def obj_level(self):
     classes = Attr(
         u'//*[@class="lignes"]//div[@id="%s"]' % self.env[u'line'],
         attr='class'
     )(self)
     classes = classes.split()
     if u"perturb_critique_trav" in classes:
         return CRITICAL_AND_WORK
     elif u"perturb_critique" in classes:
         return CRITICAL
     elif u"perturb_alerte_trav" in classes:
         return ALERT_AND_WORK
     elif u"perturb_alerte" in classes:
         return ALERT
     elif u"perturb_normal_trav" in classes:
         return NORMAL_AND_WORK
     elif u"perturb_normal" in classes:
         return NORMAL
Example #10
0
    def iter_internal_recipients(self):
        if self.doc.xpath('//ul[@id="idCmptToInterne"]'):
            for account in self.doc.xpath('//ul[@id="idCmptToInterne"]/li'):
                data = Attr('.', 'data-comptecomplet')(account)
                json_data = json.loads(data.replace('"', '"'))

                rcpt = Recipient()
                rcpt.category = 'Interne'
                rcpt.id = rcpt.iban = json_data['ibanCompte']
                rcpt.label = json_data['libelleCompte']
                rcpt.enabled_at = date.today()

                rcpt._formatted_iban = json_data['ibanFormateCompte']
                rcpt._account_title = json_data['intituleCompte']
                rcpt._bic = json_data['bicCompte']
                rcpt._ref = ''
                rcpt._code_origin = ''
                rcpt._created_date = ''

                yield rcpt
Example #11
0
        class item(ItemElement):
            klass = Housing

            obj_id = Format(
                '%s:%s', Env('type'),
                Attr('.//span[boolean(@data-reference)]', 'data-reference'))

            def obj_url(self):
                return urljoin(
                    self.page.browser.BASEURL,
                    Link('.//h3[has-class("TeaserOffer-title")]/a')(self))

            obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
            obj_area = CleanDecimal(
                Regexp(
                    CleanText(
                        './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
                    ), r'(\d*\.*\d*) .*'))
            obj_cost = CleanDecimal(
                Regexp(
                    CleanText('.//strong[has-class("TeaserOffer-price-num")]'),
                    r'([\d \.]*) .*'))
            obj_price_per_meter = PricePerMeterFilter()
            obj_currency = Regexp(
                CleanText('.//strong[has-class("TeaserOffer-price-num")]'),
                r'[\d \.]* (.) .*')
            obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
            obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')

            def obj_photos(self):
                return [
                    Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src')(self)
                ]

            obj_date = datetime.date.today()

            def obj_utilities(self):
                price = CleanText(
                    './/strong[has-class("TeaserOffer-price-num")]')(self)
                if "charges comprises" in price.lower():
                    return UTILITIES.INCLUDED
                else:
                    return UTILITIES.EXCLUDED

            obj_rooms = CleanDecimal(
                './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]'
            )
            obj_bedrooms = NotAvailable

            def obj_details(self):
                return {
                    "dispo":
                    Date(
                        Attr('.//span[boolean(@data-dispo)]',
                             'data-dispo',
                             default=datetime.date.today().isoformat()))(self),
                    "priceMentions":
                    CleanText(
                        './/span[has-class("TeaserOffer-price-mentions")]')(
                            self)
                }
Example #12
0
 def obj_photos(self):
     return [
         Attr('.//a[has-class("TeaserOffer-ill")]/img', 'src')(self)
     ]
Example #13
0
 def obj_photos(self):
     photos = []
     for photo in self.xpath(
             '//li[has-class("OfferSlider-thumbs-item")]/img'):
         photos.append(Attr('.', 'src')(photo))
     return photos
Example #14
0
 def obj_photos(self):
     photos = []
     url = Attr('./div[@class="item_image"]/span/span/img', 'src', default=None)(self)
     if url:
         photos.append(HousingPhoto(url))
     return photos
Example #15
0
 def condition(self):
     num = Attr('.', 'data-fact_ligne', default='')(self)
     return self.env['subid'] == num
Example #16
0
 def next_page(self):
     page = Attr('//a[@id="next-page"]', 'data')(self)
     return add_qs(self.page.url, page=page)
Example #17
0
 def obj_code(self):
     onclick = Attr(None, 'onclick').filter(
         (TableCell('label')(self)[0]).xpath('.//a'))
     m = re.search(',\s+\'([^\'_]+)', onclick)
     return NotAvailable if not m else m.group(1)
Example #18
0
 def next_page(self):
     idt = Attr('//a[@title="suivant"]', 'id', default=None)(self.page.doc)
     if idt:
         form = self.page.get_history_form(idt)
         return requests.Request("POST", form.url, data=dict(form))
Example #19
0
 def get_performance_url(self):
     return Attr('(//li[@role="presentation"])[1]//a', 'data-href', default=None)(self.doc)
Example #20
0
 def go_start(self):
     idt = Attr('//a[@title="debut" or @title="precedent"]', 'id', default=None)(self.doc)
     if idt:
         form = self.get_history_form(idt)
         form.submit()
Example #21
0
 def get_multi(self):
     return [Attr('.', 'value')(option) for option in \
         self.doc.xpath('//select[@class="ComboEntreprise"]/option')]
Example #22
0
    def parse(self, el):
        # Trying to find vdate and unitvalue
        unitvalue, vdate = None, None
        for span in TableCell('label')(self)[0].xpath('.//span'):
            if unitvalue is None:
                unitvalue = Regexp(CleanText('.'), '^([\d,]+)$', default=None)(span)
            if vdate is None:
                vdate = None if any(x in CleanText('./parent::div')(span) for x in ["échéance", "Maturity"]) else \
                        Regexp(CleanText('.'), '^([\d\/]+)$', default=None)(span)
        self.env['unitvalue'] = MyDecimal().filter(unitvalue) if unitvalue else NotAvailable
        self.env['vdate'] = Date(dayfirst=True).filter(vdate) if vdate else NotAvailable
        self.env['_link'] = None
        self.env['asset_category'] = NotAvailable

        page = None
        link_id = Attr(u'.//a[contains(@title, "détail du fonds")]', 'id', default=None)(self)
        inv_id = Attr('.//a[contains(@id, "linkpdf")]', 'id', default=None)(self)

        if link_id and inv_id:
            form = self.page.get_form('//div[@id="operation"]//form')
            form['idFonds'] = inv_id.split('-', 1)[-1]
            form['org.richfaces.ajax.component'] = form[link_id] = link_id
            page = self.page.browser.open(form['javax.faces.encodedURL'], data=dict(form)).page

            if 'hsbc.fr' in self.page.browser.BASEURL:
                # Special space for HSBC, does not contain any information related to performances.
                m = re.search(r'fundid=(\w+).+SH=(\w+)', CleanText('//complete', default='')(page.doc))
                if m:  # had to put full url to skip redirections.
                    page = page.browser.open('https://www.assetmanagement.hsbc.com/feedRequest?feed_data=gfcFundData&cod=FR&client=FCPE&fId=%s&SH=%s&lId=fr' % m.groups()).page

            elif not self.page.browser.history.is_here():
                url = page.get_invest_url()

                if empty(url):
                    self.env['code'] = NotAvailable
                    self.env['code_type'] = NotAvailable
                    return

                # URLs used in browser.py to access investments performance history:
                if url.startswith('https://optimisermon.epargne-retraite-entreprises'):
                    # This URL can be used to access the BNP Wealth API to fetch investment performance and ISIN code
                    self.env['_link'] = url
                    self.env['code'] = NotAvailable
                    self.env['code_type'] = NotAvailable
                    return
                elif (url.startswith('http://sggestion-ede.com/product') or
                    url.startswith('https://www.lyxorfunds.com/part') or
                    url.startswith('https://www.societegeneralegestion.fr') or
                    url.startswith('http://www.etoile-gestion.com/productsheet')):
                    self.env['_link'] = url

                # Try to fetch ISIN code from URL with re.match
                match = re.match(r'http://www.cpr-am.fr/fr/fonds_detail.php\?isin=([A-Z0-9]+)', url)
                match = match or re.match(r'http://www.cpr-am.fr/particuliers/product/view/([A-Z0-9]+)', url)
                if match:
                    self.env['code'] = match.group(1)
                    if is_isin_valid(match.group(1)):
                        self.env['code_type'] = Investment.CODE_TYPE_ISIN
                    else:
                        self.env['code_type'] = Investment.CODE_TYPE_AMF
                    return

                # Try to fetch ISIN code from URL with re.search
                m = re.search(r'&ISIN=([^&]+)', url)
                m = m or re.search(r'&isin=([^&]+)', url)
                m = m or re.search(r'&codeIsin=([^&]+)', url)
                m = m or re.search(r'lyxorfunds\.com/part/([^/]+)', url)
                if m:
                    self.env['code'] = m.group(1)
                    if is_isin_valid(m.group(1)):
                        self.env['code_type'] = Investment.CODE_TYPE_ISIN
                    else:
                        self.env['code_type'] = Investment.CODE_TYPE_AMF
                    return

                useless_urls = (
                    # pdf... http://docfinder.is.bnpparibas-ip.com/api/files/040d05b3-1776-4991-aa49-f0cd8717dab8/1536
                    'http://docfinder.is.bnpparibas-ip.com/',
                    # The AXA website displays performance graphs but everything is calculated using JS scripts.
                    # There is an API but it only contains risk data and performances per year, not 1-3-5 years.
                    'https://epargne-salariale.axa-im.fr/fr/',
                    # Redirection to the Rothschild Gestion website, which doesn't exist anymore...
                    'https://www.rothschildgestion.com',
                    # URL to the Morningstar website does not contain any useful information
                    'http://doc.morningstar.com',
                )
                for useless_url in useless_urls:
                    if url.startswith(useless_url):
                        self.env['code'] = NotAvailable
                        self.env['code_type'] = NotAvailable
                        return

                if url.startswith('http://fr.swisslife-am.com/fr/'):
                    self.page.browser.session.cookies.set('location', 'fr')
                    self.page.browser.session.cookies.set('prof', 'undefined')
                try:
                    page = self.page.browser.open(url).page
                except HTTPNotFound:
                    # Some pages lead to a 404 so we must avoid unnecessary crash
                    self.logger.warning('URL %s was not found, investment details will be skipped.', url)

        if isinstance(page, CodePage):
            self.env['code'] = page.get_code()
            self.env['code_type'] = page.CODE_TYPE
            self.env['asset_category'] = page.get_asset_category()
        else:
            # The page is not handled and does not have a get_code method.
            self.env['code'] = NotAvailable
            self.env['code_type'] = NotAvailable
            self.env['asset_category'] = NotAvailable
Example #23
0
 def get_history_jid(self):
     span = Attr('//*[starts-with(@id, "index:j_id")]', 'id')(self.doc)
     jid = span.split(':')[1]
     return jid
Example #24
0
 def get_iframe(self):
     return Attr(None, 'src').filter(
         self.doc.xpath('//iframe[@id="iframePartenaire"]'))
Example #25
0
 def load_virtual(self, phonenumber):
     for div in self.doc.xpath('//div[@class="infosLigne pointer"]'):
         if CleanText('.')(div).split("-")[-1].strip() == phonenumber:
             return Attr('.', 'onclick')(div).split('(')[1][1]
Example #26
0
 def obj_label(self):
     name = Attr('.', 'data-nomcontrat', default=None)(self)
     if not name:
         name = CleanText('.')(self)
     return name
Example #27
0
 def get_dropdown_menu(self, account_id):
     # Get the 'idCptSelect' in a drop-down menu that corresponds the current account
     for cpt in self.doc.xpath(
             '//select[@id="idCptSelect"]//option[@value]'):
         if account_id in CleanText('.', replace=[(' ', '')])(cpt):
             return Attr('.', 'value')(cpt)
Example #28
0
        class item(ItemElement):
            klass = Housing

            obj_id = Format(
                '%s:%s', Env('type'),
                Attr('.//span[boolean(@data-reference)]', 'data-reference'))
            obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PROFESSIONAL

            def obj_house_type(self):
                url = self.obj_url(self)
                for house_type, types in QUERY_HOUSE_TYPES.items():
                    for type in types:
                        if ('/%s/' % type) in url:
                            return house_type
                return NotLoaded

            obj_url = AbsoluteLink('.//h3[has-class("TeaserOffer-title")]/a')
            obj_title = CleanText('.//h3[has-class("TeaserOffer-title")]')
            obj_area = CleanDecimal(Regexp(CleanText(
                './/div[has-class("MiniData")]//p[@data-behat="surfaceDesBiens"]'
            ),
                                           r'(\d*\.*\d*) .*',
                                           default=NotAvailable),
                                    default=NotAvailable)
            obj_cost = CleanDecimal(
                './/strong[has-class("TeaserOffer-price-num")]',
                default=NotAvailable)
            obj_price_per_meter = PricePerMeterFilter()
            obj_currency = Currency(
                './/strong[has-class("TeaserOffer-price-num")]')
            obj_location = CleanText('.//p[has-class("TeaserOffer-loc")]')
            obj_text = CleanText('.//p[has-class("TeaserOffer-description")]')

            def obj_photos(self):
                url = CleanText(
                    Attr('.//a[has-class("TeaserOffer-ill")]/img',
                         'src'))(self)
                # If the used photo is a default no photo, the src is on the same domain.
                if url[0] == '/':
                    return []
                else:
                    return [HousingPhoto(url)]

            obj_date = datetime.date.today()

            def obj_utilities(self):
                price = CleanText(
                    './/strong[has-class("TeaserOffer-price-num")]')(self)
                if "charges comprises" in price.lower():
                    return UTILITIES.INCLUDED
                else:
                    return UTILITIES.EXCLUDED

            obj_rooms = CleanDecimal(
                './/div[has-class("MiniData")]//p[@data-behat="nbPiecesDesBiens"]',
                default=NotLoaded)
            obj_bedrooms = CleanDecimal(
                './/div[has-class("MiniData")]//p[@data-behat="nbChambresDesBiens"]',
                default=NotLoaded)

            def obj_details(self):
                return {
                    "dispo":
                    Date(
                        Attr('.//span[boolean(@data-dispo)]',
                             'data-dispo',
                             default=datetime.date.today().isoformat()))(self),
                    "priceMentions":
                    CleanText(
                        './/span[has-class("TeaserOffer-price-mentions")]')(
                            self)
                }
Example #29
0
        class item(ItemElement):
            klass = BaseObject

            obj_id = Attr('.', 'data-trend-name')
Example #30
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Format(
            '%s:%s', Env('type'),
            Attr('//div[boolean(@data-property-reference)]',
                 'data-property-reference'))
        obj_advert_type = ADVERT_TYPES.PROFESSIONAL

        def obj_type(self):
            type = Env('type')(self)
            if type == 'location':
                if 'appartement-meuble' in self.page.url:
                    return POSTS_TYPES.FURNISHED_RENT
                else:
                    return POSTS_TYPES.RENT
            elif type == 'achat':
                return POSTS_TYPES.SALE
            else:
                return NotAvailable

        def obj_url(self):
            return self.page.url

        def obj_house_type(self):
            url = self.obj_url()
            for house_type, types in QUERY_HOUSE_TYPES.items():
                for type in types:
                    if ('/%s/' % type) in url:
                        return house_type
            return NotAvailable

        obj_title = CleanText('//h1[has-class("OfferTop-title")]')
        obj_area = CleanDecimal(Regexp(CleanText(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'),
                                       r'(\d*\.*\d*) .*',
                                       default=NotAvailable),
                                default=NotAvailable)
        obj_cost = CleanDecimal('//span[has-class("OfferTop-price")]',
                                default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()
        obj_currency = Currency('//span[has-class("OfferTop-price")]')
        obj_location = Format('%s - %s',
                              CleanText('//p[@data-behat="adresseBien"]'),
                              CleanText('//p[has-class("OfferTop-loc")]'))
        obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
        obj_phone = Regexp(Link('//a[has-class("OfferContact-btn--tel")]'),
                           r'tel:(.*)')

        def obj_photos(self):
            photos = []
            for photo in self.xpath('//div[has-class("OfferSlider")]//img'):
                photo_url = Attr('.', 'src')(photo)
                photo_url = photo_url.replace('640/480', '800/600')
                photos.append(HousingPhoto(photo_url))
            return photos

        obj_date = datetime.date.today()

        def obj_utilities(self):
            price = CleanText('//p[has-class("OfferTop-price")]')(self)
            if "charges comprises" in price.lower():
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_rooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]',
            default=NotAvailable)
        obj_bedrooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
            default=NotAvailable)

        def obj_DPE(self):
            try:
                electric_consumption = CleanDecimal(
                    Regexp(
                        Attr('//div[has-class("OfferDetails-content")]//img',
                             'src'),
                        r'https://dpe.foncia.net\/(\d+)\/.*'))(self)
            except (RegexpError, XPathNotFound):
                electric_consumption = None

            DPE = ""
            if electric_consumption is not None:
                if electric_consumption <= 50:
                    DPE = "A"
                elif 50 < electric_consumption <= 90:
                    DPE = "B"
                elif 90 < electric_consumption <= 150:
                    DPE = "C"
                elif 150 < electric_consumption <= 230:
                    DPE = "D"
                elif 230 < electric_consumption <= 330:
                    DPE = "E"
                elif 330 < electric_consumption <= 450:
                    DPE = "F"
                else:
                    DPE = "G"
                return getattr(ENERGY_CLASS, DPE, NotAvailable)
            return NotAvailable

        def obj_details(self):
            details = {}

            dispo = Date(
                Regexp(CleanText('//p[has-class("OfferTop-dispo")]'),
                       r'.* (\d\d\/\d\d\/\d\d\d\d)',
                       default=datetime.date.today().isoformat()))(self)
            if dispo is not None:
                details["dispo"] = dispo

            priceMentions = CleanText('//p[has-class("OfferTop-mentions")]',
                                      default=None)(self)
            if priceMentions is not None:
                details["priceMentions"] = priceMentions

            agency = CleanText('//p[has-class("OfferContact-address")]',
                               default=None)(self)
            if agency is not None:
                details["agency"] = agency

            for item in self.xpath(
                    '//div[has-class("OfferDetails-columnize")]/div'):
                category = CleanText(
                    './h3[has-class("OfferDetails-title--2")]',
                    default=None)(item)
                if not category:
                    continue

                details[category] = {}

                for detail_item in item.xpath(
                        './/ul[has-class("List--data")]/li'):
                    detail_title = CleanText(
                        './/span[has-class("List-data")]')(detail_item)
                    detail_value = CleanText('.//*[has-class("List-value")]')(
                        detail_item)
                    details[category][detail_title] = detail_value

                for detail_item in item.xpath(
                        './/ul[has-class("List--bullet")]/li'):
                    detail_title = CleanText('.')(detail_item)
                    details[category][detail_title] = True

            try:
                electric_consumption = CleanDecimal(
                    Regexp(
                        Attr('//div[has-class("OfferDetails-content")]//img',
                             'src'),
                        r'https://dpe.foncia.net\/(\d+)\/.*'))(self)
                details["electric_consumption"] = (
                    '{} kWhEP/m².an'.format(electric_consumption))
            except (RegexpError, XPathNotFound):
                pass

            return details
Example #31
0
 def get_origin_account_id(self, origin):
     return [Attr('.', 'data-acct-number')(div) for div in self.doc.xpath('//div[@id="internalAccounts"]//div[@data-acct-number]')
             if Attr('.', 'data-acct-number')(div) in origin.id][0]
Example #32
0
 def obj_thumbnail(self):
     style = Attr(
         './/a[has-class("bu_cuisine_recette_img")]/span',
         'style'
     )(self)
     return Thumbnail(style.replace("background-image:url(", "").rstrip(");"))
Example #33
0
 def able_to_transfer(self, origin):
     return [div for div in self.doc.xpath('//div[@id="internalAccounts"]//div[@data-acct-number]')
             if Attr('.', 'data-acct-number')(div) in origin.id and 'disabled' not in div.attrib['class']]
Example #34
0
        class item(ItemElement):
            offer_details_wrapper = (
                './div/div/div[has-class("offer-details-wrapper")]')
            klass = Housing

            obj_id = Format(
                '%s-%s', Regexp(Env('type'), '(.*)-.*'),
                CleanText('./@id', replace=[('header-offer-', '')]))
            obj_type = Env('query_type')
            obj_advert_type = ADVERT_TYPES.PROFESSIONAL

            def obj_house_type(self):
                house_type = CleanText('.//p[has-class("offer-type")]')(
                    self).lower()
                if house_type == "appartement":
                    return HOUSE_TYPES.APART
                elif house_type == "maison":
                    return HOUSE_TYPES.HOUSE
                elif house_type == "terrain":
                    return HOUSE_TYPES.LAND
                elif house_type == "parking":
                    return HOUSE_TYPES.PARKING
                else:
                    return HOUSE_TYPES.OTHER

            obj_title = Attr(
                offer_details_wrapper + '/div/div/p[@class="offer-type"]/a',
                'title')
            obj_url = Format(
                "http://www.logic-immo.com/%s.htm",
                CleanText('./@id',
                          replace=[('header-offer-', 'detail-location-')]))
            obj_area = CleanDecimal(
                (offer_details_wrapper +
                 '/div/div/div[has-class("offer-details-second")]' +
                 '/div/h3[has-class("offer-attributes")]/span' +
                 '/span[has-class("offer-area-number")]'),
                default=NotAvailable)
            obj_rooms = CleanDecimal(
                (offer_details_wrapper +
                 '/div/div/div[has-class("offer-details-second")]' +
                 '/div/h3[has-class("offer-attributes")]' +
                 '/span[has-class("offer-rooms")]' +
                 '/span[has-class("offer-rooms-number")]'),
                default=NotAvailable)
            obj_price_per_meter = PricePerMeterFilter()
            obj_cost = CleanDecimal(Regexp(
                CleanText((offer_details_wrapper +
                           '/div/div/p[@class="offer-price"]/span'),
                          default=NotAvailable),
                '(.*) [%s%s%s]' % (u'€', u'$', u'£'),
                default=NotAvailable),
                                    default=NotAvailable)
            obj_currency = Currency(
                offer_details_wrapper +
                '/div/div/p[has-class("offer-price")]/span')
            obj_utilities = UTILITIES.UNKNOWN
            obj_date = Date(
                Regexp(
                    CleanText(
                        './div/div/div[has-class("offer-picture-more")]/div/p[has-class("offer-update")]'
                    ), ".*(\d{2}/\d{2}/\d{4}).*"))
            obj_text = CleanText(
                offer_details_wrapper +
                '/div/div/div/p[has-class("offer-description")]/span')
            obj_location = CleanText(offer_details_wrapper +
                                     '//div[has-class("offer-places-block")]')

            def obj_photos(self):
                photos = []
                url = Attr('.//div[has-class("offer-picture")]//img',
                           'src')(self)
                if url:
                    url = url.replace('400x267', '800x600')
                    url = urljoin(self.page.url, url)  # Ensure URL is absolute
                    photos.append(HousingPhoto(url))
                return photos

            def obj_details(self):
                details = {}
                honoraires = CleanText(
                    (self.offer_details_wrapper +
                     '/div/div/p[@class="offer-agency-fees"]'),
                    default=None)(self)
                if honoraires:
                    details["Honoraires"] = ("{} (TTC, en sus)".format(
                        honoraires.split(":")[1].strip()))
                return details
Example #35
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Format(
            '%s:%s', Env('type'),
            Attr('//div[boolean(@data-property-reference)]',
                 'data-property-reference'))

        def obj_url(self):
            return self.page.url

        obj_title = CleanText('//h1[has-class("OfferTop-title")]')
        obj_area = CleanDecimal(
            Regexp(
                CleanText(
                    '//div[has-class("MiniData")]//p[has-class("MiniData-item")][1]'
                ), r'(\d*\.*\d*) .*'))
        obj_cost = CleanDecimal(
            Regexp(CleanText('//p[has-class("OfferTop-price")]'),
                   r'([\d \.]*) .*'))
        obj_price_per_meter = PricePerMeterFilter()
        obj_currency = Regexp(CleanText('//p[has-class("OfferTop-price")]'),
                              r'[\d \.]* (.) .*')
        obj_location = Format('%s - %s',
                              CleanText('//p[@data-behat="adresseBien"]'),
                              CleanText('//p[has-class("OfferTop-loc")]'))
        obj_text = CleanText('//div[has-class("OfferDetails-content")]/p[1]')
        obj_phone = Regexp(Link('//a[has-class("OfferContact-btn--tel")]'),
                           r'tel:(.*)')

        def obj_photos(self):
            photos = []
            for photo in self.xpath(
                    '//li[has-class("OfferSlider-thumbs-item")]/img'):
                photos.append(Attr('.', 'src')(photo))
            return photos

        obj_date = datetime.date.today()

        def obj_utilities(self):
            price = CleanText('//p[has-class("OfferTop-price")]')(self)
            if "charges comprises" in price.lower():
                return UTILITIES.INCLUDED
            else:
                return UTILITIES.EXCLUDED

        obj_rooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][2]')
        obj_bedrooms = CleanDecimal(
            '//div[has-class("MiniData")]//p[has-class("MiniData-item")][3]',
            default=NotAvailable)

        def obj_details(self):
            details = {
                "dispo":
                Date(
                    Regexp(CleanText('//p[has-class("OfferTop-dispo")]'),
                           r'.* (\d\d\/\d\d\/\d\d\d\d)',
                           default=datetime.date.today().isoformat()))(self),
                "priceMentions":
                CleanText('//p[has-class("OfferTop-mentions")]')(self),
                "agency":
                CleanText('//p[has-class("OfferContact-address")]')(self)
            }
            for item in self.xpath(
                    '//div[has-class("OfferDetails-columnize")]/div'):
                category = CleanText(
                    './h3[has-class("OfferDetails-title--2")]')(item)
                details[category] = {}
                for detail_item in item.xpath(
                        './/ul[has-class("List--data")]/li'):
                    detail_title = CleanText(
                        './/span[has-class("List-data")]')(detail_item)
                    detail_value = CleanText('.//*[has-class("List-value")]')(
                        detail_item)
                    details[category][detail_title] = detail_value
                for detail_item in item.xpath(
                        './/ul[has-class("List--bullet")]/li'):
                    detail_title = CleanText('.')(detail_item)
                    details[category][detail_title] = True
            electric_consumption = CleanDecimal(
                Regexp(Attr('//div[has-class("OfferDetails-content")]//img',
                            'src'),
                       r'https://dpe.foncia.net\/(\d+)\/.*',
                       default=None)(self))
            if electric_consumption is not None:
                details["electric_consumption"] = '{} kWhEP/m².an'.format(
                    electric_consumption)
                if electric_consumption <= 50:
                    details["DPE"] = "A"
                elif electric_consumption > 50 and electric_consumption <= 90:
                    details["DPE"] = "B"
                elif electric_consumption > 90 and electric_consumption <= 150:
                    details["DPE"] = "C"
                elif electric_consumption > 150 and electric_consumption <= 230:
                    details["DPE"] = "D"
                elif electric_consumption > 230 and electric_consumption <= 330:
                    details["DPE"] = "E"
                elif electric_consumption > 330 and electric_consumption <= 450:
                    details["DPE"] = "F"
                else:
                    details["DPE"] = "G"
            else:
                details["electric_consumption"] = NotAvailable
                details["DPE"] = NotAvailable
            return details
Example #36
0
        class item(ItemElement):
            klass = Subscription

            # TODO: Handle energy type
            obj_label = CleanText(CleanHTML('.'))
            obj_id = Attr('./input', 'value')
Example #37
0
    def parse(self, el):
        # Trying to find vdate and unitvalue
        unitvalue, vdate = None, None
        for span in TableCell('label')(self)[0].xpath('.//span'):
            if unitvalue is None:
                unitvalue = Regexp(CleanText('.'), '^([\d,]+)$', default=None)(span)
            if vdate is None:
                vdate = None if any(x in CleanText('./parent::div')(span) for x in [u"échéance", "Maturity"]) else \
                        Regexp(CleanText('.'), '^([\d\/]+)$', default=None)(span)
        self.env['unitvalue'] = MyDecimal().filter(unitvalue) if unitvalue else NotAvailable
        self.env['vdate'] = Date(dayfirst=True).filter(vdate) if vdate else NotAvailable

        page = None
        link_id = Attr(u'.//a[contains(@title, "détail du fonds")]', 'id', default=None)(self)
        inv_id = Attr('.//a[contains(@id, "linkpdf")]', 'id', default=None)(self)

        if link_id and inv_id:
            form = self.page.get_form('//div[@id="operation"]//form')
            form['idFonds'] = inv_id.split('-', 1)[-1]
            form['org.richfaces.ajax.component'] = form[link_id] = link_id
            page = self.page.browser.open(form['javax.faces.encodedURL'], data=dict(form)).page

            if "hsbc.fr" in self.page.browser.BASEURL: # special space for HSBC
                m = re.search('fundid=(\w+).+SH=(\w+)', CleanText('//complete', default="")(page.doc))

                if m: # had to put full url to skip redirections.
                    page = page.browser.open('https://www.assetmanagement.hsbc.com/feedRequest?feed_data=gfcFundData&cod=FR&client=FCPE&fId=%s&SH=%s&lId=fr' % m.groups()).page
            elif "consulteroperations" not in self.page.browser.url: # not on history
                url = Regexp(CleanText('//complete'), r"openUrlFichesFonds\('(.*?)',true|false\).*", default=NotAvailable)(page.doc)

                if url is NotAvailable:
                    # redirection to a useless graphplot page with url like /portal/salarie-sg/fichefonds?idFonds=XXX&source=/portal/salarie-sg/monepargne/mesavoirs
                    # or on bnp, look for plot display function in a script
                    assert CleanText('//redirect/@url')(page.doc) or CleanText('//script[contains(text(), "afficherGraphique")]')(page.doc)
                    self.env['code'] = NotAvailable
                    self.env['code_type'] = NotAvailable
                    return

                useless_urls = (
                    # pdf... http://docfinder.is.bnpparibas-ip.com/api/files/040d05b3-1776-4991-aa49-f0cd8717dab8/1536
                    'http://docfinder.is.bnpparibas-ip.com/',
                    # Redirection to a useless page with url like "https://epargne-salariale.axa-im.fr/fr/"
                    'https://epargne-salariale.axa-im.fr/fr/',
                )

                for useless_url in useless_urls:
                    if url.startswith(useless_url):
                        self.env['code'] = NotAvailable
                        self.env['code_type'] = NotAvailable
                        return

                match = re.match(r'http://www.cpr-am.fr/fr/fonds_detail.php\?isin=([A-Z0-9]+)', url)
                match = match or re.match(r'http://www.cpr-am.fr/particuliers/product/view/([A-Z0-9]+)', url)
                if match:
                    self.env['code'] = match.group(1)
                    self.env['code_type'] = Investment.CODE_TYPE_ISIN
                    return

                if url.startswith('http://fr.swisslife-am.com/fr/'):
                    self.page.browser.session.cookies.set('location', 'fr')
                    self.page.browser.session.cookies.set('prof', 'undefined')

                page = self.page.browser.open(url).page

        try:
            self.env['code'] = page.get_code()
            self.env['code_type'] = page.CODE_TYPE
        # Handle page is None and page has not get_code method
        except AttributeError:
            self.env['code'] = NotAvailable
            self.env['code_type'] = NotAvailable
Example #38
0
 def get_typelist(self):
     url = Attr(None, 'src').filter(
         self.doc.xpath('//script[contains(@src, "comptes/scripts")]'))
     m = re.search('synthesecomptes[^\w]+([^:]+)[^\w]+([^"]+)',
                   self.browser.open(url).content)
     return {m.group(1): m.group(2)}
Example #39
0
 def obj__link(self):
     link = Attr('.//a[@class="account--name"]', 'href', default=NotAvailable)(self)
     if not self.page.browser.webid:
         self.page.browser.webid = re.search('\/([^\/|?|$]{32})(\/|\?|$)', link).group(1)
     return '%smouvements' % link if link.startswith('/budget') else link
Example #40
0
        class item(Transaction.TransactionElement):
            load_details = Attr('.', 'href', default=None) & AsyncLoad

            def obj_type(self):
                type = Async(
                    'details',
                    CleanText(
                        u'//td[contains(text(), "Nature de l\'opération")]/following-sibling::*[1]'
                    ))(self)
                if not type:
                    return Transaction.TYPE_UNKNOWN
                for pattern, _type in Transaction.PATTERNS:
                    match = pattern.match(type)
                    if match:
                        return _type
                        break
                return Transaction.TYPE_UNKNOWN

            def condition(self):
                return self.parent.get_colnum('date') is not None and \
                       len(self.el.findall('td')) >= 3 and \
                       self.el.get('class') and \
                       'tableTr' not in self.el.get('class')

            def validate(self, obj):
                if obj.category == 'RELEVE CB':
                    obj.type = Transaction.TYPE_CARD_SUMMARY
                    obj.deleted = True

                raw = Async(
                    'details',
                    CleanText(
                        u'//td[contains(text(), "Libellé")]/following-sibling::*[1]|//td[contains(text(), "Nom du donneur")]/following-sibling::*[1]',
                        default=obj.raw))(self)
                if raw:
                    if obj.raw in raw or raw in obj.raw or ' ' not in obj.raw:
                        obj.raw = raw
                        obj.label = raw
                    else:
                        obj.label = '%s %s' % (obj.raw, raw)
                        obj.raw = '%s %s' % (obj.raw, raw)
                if not obj.date:
                    obj.date = Async(
                        'details',
                        Date(CleanText(
                            u'//td[contains(text(), "Date de l\'opération")]/following-sibling::*[1]',
                            default=u''),
                             default=NotAvailable))(self)
                    obj.rdate = obj.date
                    obj.vdate = Async(
                        'details',
                        Date(CleanText(
                            u'//td[contains(text(), "Date de valeur")]/following-sibling::*[1]',
                            default=u''),
                             default=NotAvailable))(self)
                    obj.amount = Async(
                        'details',
                        CleanDecimal(
                            u'//td[contains(text(), "Montant")]/following-sibling::*[1]',
                            replace_dots=True,
                            default=NotAvailable))(self)
                # ugly hack to fix broken html
                if not obj.amount:
                    obj.amount = Async(
                        'details',
                        CleanDecimal(
                            u'//td[contains(text(), "Montant")]/following-sibling::*[1]',
                            replace_dots=True,
                            default=NotAvailable))(self)
                return True
Example #41
0
 def history_tabs_urls(self):
     return [
         Attr('.', 'href')(tab)
         for tab in self.doc.xpath('//ul//a[contains(text(), "Débit le")]')
     ]
Example #42
0
 def get_login(self, phonenumber):
     return Attr('.', 'login')(self.doc.xpath(
         '//div[div[contains(text(), $phone)]]', phone=phonenumber)[0])
Example #43
0
 def has_next(self):
     current = Attr('//input[@id="numPage"]', 'value', default='')(self.doc)
     end = CleanText('//td[@id="numPageBloc"]/b[@class="contenu3-lien"]', replace=[('/', '')])(self.doc)
     return end and current and int(end) > int(current)
Example #44
0
 def get_number(self):
     return Attr('//div[@id="player"]', 'data-main-video')(self.doc)