Exemple #1
0
            def parse(self, el):
                raw = self.extract.match(el.text).group("html")
                raw = raw.replace('\\"', '"').replace('\\n',
                                                      '').replace('\\/', '/')
                parsed = lxml.html.fromstring(raw)

                self.env['name'] = CleanText(
                    './/span[@class="popUpTitleBold"]')(parsed)
                self.env['object'] = CleanText(
                    './/span[@class="popUpTitleNormal"]')(parsed).strip(' /')
                url = Attr('.//div[@class="popUpMsDiagramm"]/img',
                           'src')(parsed)
                self.env['id'] = url.split('_')[1]

                for tr in parsed.xpath('.//tr'):
                    td = tr.xpath('.//td')
                    if len(td) == 1 and "Datum" in td[0].text:
                        l = td[0].text.split()[1:3]
                        self.env['datetime'] = "%s %s" % (l[0], l[1])
                    elif len(td) == 2:
                        if "Wasserstand" in td[0].text:
                            self.env['levelvalue'] = td[1].text.split()[0]
                        elif "Durchfluss" in td[0].text:
                            self.env['flowvalue'] = td[1].text.split()[0]
                        elif "Tendenz" in td[0].text:
                            try:
                                self.env['forecast'] = Attr('img', 'src')(
                                    td[1]).split("/")[-1]
                            except ParseError:
                                self.env['forecast'] = None
                # TODO
                self.env['alarm'] = None
Exemple #2
0
            def parse(self, el):
                raw = self.extract.match(el.text).group("html")
                raw = raw.replace('\\"', '"').replace('\\n', '').replace('\\/', '/')
                parsed = lxml.html.fromstring(raw)

                self.env['name'] = CleanText('.//span[@class="popUpTitleBold"]')(parsed)
                self.env['object'] = CleanText('.//span[@class="popUpTitleNormal"]')(parsed).strip(' /')
                url = Attr('.//div[@class="popUpMsDiagramm"]/img', 'src')(parsed)
                self.env['id'] = url.split('_')[1]

                for tr in parsed.xpath('.//tr'):
                    td = tr.xpath('.//td')
                    if len(td) == 1 and "Datum" in td[0].text:
                        l = td[0].text.split()[1:3]
                        self.env['datetime'] = "%s %s" % (l[0], l[1])
                    elif len(td) == 2:
                        if "Wasserstand" in td[0].text:
                            self.env['levelvalue'] = td[1].text.split()[0]
                        elif "Durchfluss" in td[0].text:
                            self.env['flowvalue'] = td[1].text.split()[0]
                        elif "Tendenz" in td[0].text:
                            try:
                                self.env['forecast'] = Attr('img', 'src')(td[1]).split("/")[-1]
                            except ParseError:
                                self.env['forecast'] = None
                # TODO
                self.env['alarm'] = None
Exemple #3
0
    def get_history_jid(self):
        if self.is_asv:
            # Assurance Vie, we do not support this kind of account.
            return None

        span = Attr('//span[starts-with(@id, "index:j_id")]', 'id')(self.doc)
        return span.split(':')[1]
Exemple #4
0
    def get_history_jid(self):
        span = self.doc.xpath('//span[@id="index:panelASV"]')
        if len(span) > 1:
            # Assurance Vie, we do not support this kind of account.
            return None

        span = Attr('//span[starts-with(@id, "index:j_id")]', 'id')(self.doc)
        jid = span.split(':')[1]
        return jid
Exemple #5
0
 def obj_DPE(self):
     DPE = Attr(
         '//div[has-class("energy-box")]//div[has-class("energy-rank")]',
         'class',
         default=""
     )(self)
     if DPE:
         DPE = [x.replace("energy-rank-", "").upper()
                for x in DPE.split() if x.startswith("energy-rank-")][0]
     return getattr(ENERGY_CLASS, DPE, NotAvailable)
Exemple #6
0
 def obj_DPE(self):
     DPE = Attr(
         '//div[has-class("energy-box")]//div[has-class("energy-rank")]',
         'class',
         default=""
     )(self)
     if DPE:
         DPE = [x.replace("energy-rank-", "").upper()
                for x in DPE.split() if x.startswith("energy-rank-")][0]
     return getattr(ENERGY_CLASS, DPE, NotAvailable)
Exemple #7
0
 def obj_details(self):
     GES = Attr(
         '//div[has-class("energy-box")]//div[has-class("rank")]',
         'class',
         default=None
     )(self)
     if GES:
         GES = [x.replace("rank-", "").upper()
                for x in GES.split() if x.startswith("rank-")][0]
     else:
         GES = NotAvailable
     return {
         "GES": GES
     }
Exemple #8
0
 def obj_level(self):
     classes = Attr(u'//*[@class="lignes"]//div[@id="%s"]' %
                    self.env[u'line'],
                    attr='class')(self)
     classes = classes.split()
     if u"perturb_critique_trav" in classes:
         return CRITICAL_AND_WORK
     elif u"perturb_critique" in classes:
         return CRITICAL
     elif u"perturb_alerte_trav" in classes:
         return ALERT_AND_WORK
     elif u"perturb_alerte" in classes:
         return ALERT
     elif u"perturb_normal_trav" in classes:
         return NORMAL_AND_WORK
     elif u"perturb_normal" in classes:
         return NORMAL
Exemple #9
0
 def obj_level(self):
     classes = Attr(
         u'//*[@class="lignes"]//div[@id="%s"]' % self.env[u'line'],
         attr='class'
     )(self)
     classes = classes.split()
     if u"perturb_critique_trav" in classes:
         return CRITICAL_AND_WORK
     elif u"perturb_critique" in classes:
         return CRITICAL
     elif u"perturb_alerte_trav" in classes:
         return ALERT_AND_WORK
     elif u"perturb_alerte" in classes:
         return ALERT
     elif u"perturb_normal_trav" in classes:
         return NORMAL_AND_WORK
     elif u"perturb_normal" in classes:
         return NORMAL
Exemple #10
0
    def parse(self, el):
        # Trying to find vdate and unitvalue
        unitvalue, vdate = None, None
        for span in TableCell('label')(self)[0].xpath('.//span'):
            if unitvalue is None:
                unitvalue = Regexp(CleanText('.'), '^([\d,]+)$',
                                   default=None)(span)
            if vdate is None:
                vdate = None if any(x in CleanText('./parent::div')(span) for x in [u"échéance", "Maturity"]) else \
                        Regexp(CleanText('.'), '^([\d\/]+)$', default=None)(span)
        self.env['unitvalue'] = MyDecimal().filter(
            unitvalue) if unitvalue else NotAvailable
        self.env['vdate'] = Date(
            dayfirst=True).filter(vdate) if vdate else NotAvailable

        page = None
        link_id = Attr(u'.//a[contains(@title, "détail du fonds")]',
                       'id',
                       default=None)(self)
        inv_id = Attr('.//a[contains(@id, "linkpdf")]', 'id',
                      default=None)(self)

        if link_id and inv_id:
            form = self.page.get_form('//div[@id="operation"]//form')
            form['idFonds'] = inv_id.split('-', 1)[-1]
            form['org.richfaces.ajax.component'] = form[link_id] = link_id

            page = self.page.browser.open(form['javax.faces.encodedURL'],
                                          data=dict(form)).page

            if "hsbc.fr" in self.page.browser.BASEURL:  # special space for HSBC
                m = re.search('fundid=(\w+).+SH=(\w+)',
                              CleanText('//complete', default="")(page.doc))

                if m:  # had to put full url to skip redirections.
                    page = page.browser.open(
                        'https://www.assetmanagement.hsbc.com/feedRequest?feed_data=gfcFundData&cod=FR&client=FCPE&fId=%s&SH=%s&lId=fr'
                        % m.groups()).page
            elif "consulteroperations" not in self.page.browser.url:  # not on history
                url = Regexp(CleanText('//complete'),
                             r"openUrlFichesFonds\('(.*?)'\)",
                             default=NotAvailable)(page.doc)
                if url is NotAvailable:
                    # redirection to a useless graphplot page with url like /portal/salarie-sg/fichefonds?idFonds=XXX&source=/portal/salarie-sg/monepargne/mesavoirs
                    assert CleanText('//redirect/@url')(page.doc)
                    self.env['code'] = NotAvailable
                    self.env['code_type'] = NotAvailable
                    return

                if url.startswith('http://docfinder.is.bnpparibas-ip.com/'):
                    # pdf... http://docfinder.is.bnpparibas-ip.com/api/files/040d05b3-1776-4991-aa49-f0cd8717dab8/1536
                    self.env['code'] = NotAvailable
                    self.env['code_type'] = NotAvailable
                    return

                match = re.match(
                    r'http://www.cpr-am.fr/fr/fonds_detail.php\?isin=([A-Z0-9]+)',
                    url)
                match = match or re.match(
                    r'http://www.cpr-am.fr/particuliers/product/view/([A-Z0-9]+)',
                    url)
                if match:
                    self.env['code'] = match.group(1)
                    self.env['code_type'] = Investment.CODE_TYPE_ISIN
                    return

                page = self.page.browser.open(url).page

        try:
            self.env['code'] = page.get_code()
            self.env['code_type'] = page.CODE_TYPE
        # Handle page is None and page has not get_code method
        except AttributeError:
            self.env['code'] = NotAvailable
            self.env['code_type'] = NotAvailable
Exemple #11
0
 def get_history_jid(self):
     span = Attr('//*[starts-with(@id, "index:j_id")]', 'id')(self.doc)
     jid = span.split(':')[1]
     return jid
Exemple #12
0
 def get_history_jid(self):
     span = Attr('//*[starts-with(@id, "index:j_id")]', 'id')(self.doc)
     jid = span.split(':')[1]
     return jid
Exemple #13
0
    def parse(self, el):
        # Trying to find vdate and unitvalue
        unitvalue, vdate = None, None
        for span in TableCell('label')(self)[0].xpath('.//span'):
            if unitvalue is None:
                unitvalue = Regexp(CleanText('.'), '^([\d,]+)$', default=None)(span)
            if vdate is None:
                vdate = None if any(x in CleanText('./parent::div')(span) for x in [u"échéance", "Maturity"]) else \
                        Regexp(CleanText('.'), '^([\d\/]+)$', default=None)(span)
        self.env['unitvalue'] = MyDecimal().filter(unitvalue) if unitvalue else NotAvailable
        self.env['vdate'] = Date(dayfirst=True).filter(vdate) if vdate else NotAvailable

        page = None
        link_id = Attr(u'.//a[contains(@title, "détail du fonds")]', 'id', default=None)(self)
        inv_id = Attr('.//a[contains(@id, "linkpdf")]', 'id', default=None)(self)

        if link_id and inv_id:
            form = self.page.get_form('//div[@id="operation"]//form')
            form['idFonds'] = inv_id.split('-', 1)[-1]
            form['org.richfaces.ajax.component'] = form[link_id] = link_id
            page = self.page.browser.open(form['javax.faces.encodedURL'], data=dict(form)).page

            if "hsbc.fr" in self.page.browser.BASEURL: # special space for HSBC
                m = re.search('fundid=(\w+).+SH=(\w+)', CleanText('//complete', default="")(page.doc))

                if m: # had to put full url to skip redirections.
                    page = page.browser.open('https://www.assetmanagement.hsbc.com/feedRequest?feed_data=gfcFundData&cod=FR&client=FCPE&fId=%s&SH=%s&lId=fr' % m.groups()).page
            elif "consulteroperations" not in self.page.browser.url: # not on history
                url = Regexp(CleanText('//complete'), r"openUrlFichesFonds\('(.*?)',true|false\).*", default=NotAvailable)(page.doc)

                if url is NotAvailable:
                    # redirection to a useless graphplot page with url like /portal/salarie-sg/fichefonds?idFonds=XXX&source=/portal/salarie-sg/monepargne/mesavoirs
                    # or on bnp, look for plot display function in a script
                    assert CleanText('//redirect/@url')(page.doc) or CleanText('//script[contains(text(), "afficherGraphique")]')(page.doc)
                    self.env['code'] = NotAvailable
                    self.env['code_type'] = NotAvailable
                    return

                useless_urls = (
                    # pdf... http://docfinder.is.bnpparibas-ip.com/api/files/040d05b3-1776-4991-aa49-f0cd8717dab8/1536
                    'http://docfinder.is.bnpparibas-ip.com/',
                    # Redirection to a useless page with url like "https://epargne-salariale.axa-im.fr/fr/"
                    'https://epargne-salariale.axa-im.fr/fr/',
                )

                for useless_url in useless_urls:
                    if url.startswith(useless_url):
                        self.env['code'] = NotAvailable
                        self.env['code_type'] = NotAvailable
                        return

                match = re.match(r'http://www.cpr-am.fr/fr/fonds_detail.php\?isin=([A-Z0-9]+)', url)
                match = match or re.match(r'http://www.cpr-am.fr/particuliers/product/view/([A-Z0-9]+)', url)
                if match:
                    self.env['code'] = match.group(1)
                    self.env['code_type'] = Investment.CODE_TYPE_ISIN
                    return

                if url.startswith('http://fr.swisslife-am.com/fr/'):
                    self.page.browser.session.cookies.set('location', 'fr')
                    self.page.browser.session.cookies.set('prof', 'undefined')

                page = self.page.browser.open(url).page

        try:
            self.env['code'] = page.get_code()
            self.env['code_type'] = page.CODE_TYPE
        # Handle page is None and page has not get_code method
        except AttributeError:
            self.env['code'] = NotAvailable
            self.env['code_type'] = NotAvailable
Exemple #14
0
    def parse(self, el):
        # Trying to find vdate and unitvalue
        unitvalue, vdate = None, None
        for span in TableCell('label')(self)[0].xpath('.//span'):
            if unitvalue is None:
                unitvalue = Regexp(CleanText('.'), '^([\d,]+)$', default=None)(span)
            if vdate is None:
                vdate = None if any(x in CleanText('./parent::div')(span) for x in ["échéance", "Maturity"]) else \
                        Regexp(CleanText('.'), '^([\d\/]+)$', default=None)(span)
        self.env['unitvalue'] = MyDecimal().filter(unitvalue) if unitvalue else NotAvailable
        self.env['vdate'] = Date(dayfirst=True).filter(vdate) if vdate else NotAvailable
        self.env['_link'] = None
        self.env['asset_category'] = NotAvailable

        page = None
        link_id = Attr(u'.//a[contains(@title, "détail du fonds")]', 'id', default=None)(self)
        inv_id = Attr('.//a[contains(@id, "linkpdf")]', 'id', default=None)(self)

        if link_id and inv_id:
            form = self.page.get_form('//div[@id="operation"]//form')
            form['idFonds'] = inv_id.split('-', 1)[-1]
            form['org.richfaces.ajax.component'] = form[link_id] = link_id
            page = self.page.browser.open(form['javax.faces.encodedURL'], data=dict(form)).page

            if 'hsbc.fr' in self.page.browser.BASEURL:
                # Special space for HSBC, does not contain any information related to performances.
                m = re.search(r'fundid=(\w+).+SH=(\w+)', CleanText('//complete', default='')(page.doc))
                if m:  # had to put full url to skip redirections.
                    page = page.browser.open('https://www.assetmanagement.hsbc.com/feedRequest?feed_data=gfcFundData&cod=FR&client=FCPE&fId=%s&SH=%s&lId=fr' % m.groups()).page

            elif not self.page.browser.history.is_here():
                url = page.get_invest_url()

                if empty(url):
                    self.env['code'] = NotAvailable
                    self.env['code_type'] = NotAvailable
                    return

                # URLs used in browser.py to access investments performance history:
                if url.startswith('https://optimisermon.epargne-retraite-entreprises'):
                    # This URL can be used to access the BNP Wealth API to fetch investment performance and ISIN code
                    self.env['_link'] = url
                    self.env['code'] = NotAvailable
                    self.env['code_type'] = NotAvailable
                    return
                elif (url.startswith('http://sggestion-ede.com/product') or
                    url.startswith('https://www.lyxorfunds.com/part') or
                    url.startswith('https://www.societegeneralegestion.fr') or
                    url.startswith('http://www.etoile-gestion.com/productsheet')):
                    self.env['_link'] = url

                # Try to fetch ISIN code from URL with re.match
                match = re.match(r'http://www.cpr-am.fr/fr/fonds_detail.php\?isin=([A-Z0-9]+)', url)
                match = match or re.match(r'http://www.cpr-am.fr/particuliers/product/view/([A-Z0-9]+)', url)
                if match:
                    self.env['code'] = match.group(1)
                    if is_isin_valid(match.group(1)):
                        self.env['code_type'] = Investment.CODE_TYPE_ISIN
                    else:
                        self.env['code_type'] = Investment.CODE_TYPE_AMF
                    return

                # Try to fetch ISIN code from URL with re.search
                m = re.search(r'&ISIN=([^&]+)', url)
                m = m or re.search(r'&isin=([^&]+)', url)
                m = m or re.search(r'&codeIsin=([^&]+)', url)
                m = m or re.search(r'lyxorfunds\.com/part/([^/]+)', url)
                if m:
                    self.env['code'] = m.group(1)
                    if is_isin_valid(m.group(1)):
                        self.env['code_type'] = Investment.CODE_TYPE_ISIN
                    else:
                        self.env['code_type'] = Investment.CODE_TYPE_AMF
                    return

                useless_urls = (
                    # pdf... http://docfinder.is.bnpparibas-ip.com/api/files/040d05b3-1776-4991-aa49-f0cd8717dab8/1536
                    'http://docfinder.is.bnpparibas-ip.com/',
                    # The AXA website displays performance graphs but everything is calculated using JS scripts.
                    # There is an API but it only contains risk data and performances per year, not 1-3-5 years.
                    'https://epargne-salariale.axa-im.fr/fr/',
                    # Redirection to the Rothschild Gestion website, which doesn't exist anymore...
                    'https://www.rothschildgestion.com',
                    # URL to the Morningstar website does not contain any useful information
                    'http://doc.morningstar.com',
                )
                for useless_url in useless_urls:
                    if url.startswith(useless_url):
                        self.env['code'] = NotAvailable
                        self.env['code_type'] = NotAvailable
                        return

                if url.startswith('http://fr.swisslife-am.com/fr/'):
                    self.page.browser.session.cookies.set('location', 'fr')
                    self.page.browser.session.cookies.set('prof', 'undefined')
                try:
                    page = self.page.browser.open(url).page
                except HTTPNotFound:
                    # Some pages lead to a 404 so we must avoid unnecessary crash
                    self.logger.warning('URL %s was not found, investment details will be skipped.', url)

        if isinstance(page, CodePage):
            self.env['code'] = page.get_code()
            self.env['code_type'] = page.CODE_TYPE
            self.env['asset_category'] = page.get_asset_category()
        else:
            # The page is not handled and does not have a get_code method.
            self.env['code'] = NotAvailable
            self.env['code_type'] = NotAvailable
            self.env['asset_category'] = NotAvailable