def login(self, username, password): self.location('https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=fbconnect://success&scope=email,user_birthday,user_friends,public_profile,user_photos,user_likes&response_type=token' % self.CLIENT_ID) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) page = HTMLPage(self, self.response) if len(page.doc.xpath('//td/div[has-class("s")]')) > 0: raise BrowserIncorrectPassword(CleanText('//td/div[has-class("s")]')(page.doc)) form = page.get_form(nr=0, submit='//input[@value="OK"]') form.submit() m = re.search('access_token=([^&]+)&', self.response.text) if m: self.access_token = m.group(1) else: raise ParseError('Unable to find access_token') self.info = self.request('/me')
def login(self, username, password): self.location('https://www.facebook.com/v2.6/dialog/oauth?redirect_uri=fb464891386855067%3A%2F%2Fauthorize%2F&display=touch&state=%7B%22challenge%22%3A%22IUUkEUqIGud332lfu%252BMJhxL4Wlc%253D%22%2C%220_auth_logger_id%22%3A%2230F06532-A1B9-4B10-BB28-B29956C71AB1%22%2C%22com.facebook.sdk_client_state%22%3Atrue%2C%223_method%22%3A%22sfvc_auth%22%7D&scope=user_birthday%2Cuser_photos%2Cuser_education_history%2Cemail%2Cuser_relationship_details%2Cuser_friends%2Cuser_work_history%2Cuser_likes&response_type=token%2Csigned_request&default_audience=friends&return_scopes=true&auth_type=rerequest&client_id=' + self.CLIENT_ID + '&ret=login&sdk=ios&logger_id=30F06532-A1B9-4B10-BB28-B29956C71AB1&ext=1470840777&hash=AeZqkIcf-NEW6vBd') page = HTMLPage(self, self.response) form = page.get_form() form['email'] = username form['pass'] = password form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) page = HTMLPage(self, self.response) if len(page.doc.xpath('//td/div[has-class("s")]')) > 0: raise BrowserIncorrectPassword(CleanText('//td/div[has-class("s")]')(page.doc)) form = page.get_form(nr=0, submit='//input[@name="__CONFIRM__"]') form.submit() m = re.search('access_token=([^&]+)&', self.response.text) if m: self.access_token = m.group(1) else: raise ParseError('Unable to find access_token') self.info = self.request('/me')
def login(self, username, password): self.location('https://www.facebook.com/v2.9/dialog/oauth?app_id=484681304938818&auth_type=rerequest&channel_url=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D44%23cb%3Df33dd8340f36618%26domain%3Dwww.okcupid.com%26origin%3Dhttps%253A%252F%252Fwww.okcupid.com%252Ff5818a5f355be8%26relation%3Dopener&client_id=484681304938818&display=popup&domain=www.okcupid.com&e2e=%7B%7D&fallback_redirect_uri=https%3A%2F%2Fwww.okcupid.com%2Flogin&locale=en_US&origin=1&redirect_uri=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D44%23cb%3Df2ce4ca90b82cb4%26domain%3Dwww.okcupid.com%26origin%3Dhttps%253A%252F%252Fwww.okcupid.com%252Ff5818a5f355be8%26relation%3Dopener%26frame%3Df3f40f304ac5e9&response_type=token%2Csigned_request&scope=email%2Cuser_birthday%2Cuser_photos&sdk=joey&version=v2.9') page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password self.session.headers['cookie-installing-permission'] = 'required' self.session.cookies['wd'] = '640x1033' self.session.cookies['act'] = '1563018648141%2F0' form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) page = HTMLPage(self, self.response) if len(page.doc.xpath('//td/div[has-class("s")]')) > 0: raise BrowserIncorrectPassword(CleanText('//td/div[has-class("s")]')(page.doc)) script = page.doc.xpath('//script')[0].text m = re.search('access_token=([^&]+)&', script) if m: self.access_token = m.group(1) else: raise ParseError('Unable to find access_token')
def on_load(self): HTMLPage.on_load(self) # website sometime crash if self.doc.xpath( u'//div[@id="divError"]/span[contains(text(),"Une erreur est survenue")]' ): raise BrowserUnavailable()
def build_doc(self, content): content = JsonPage.build_doc(self, content) if 'data' in content: # The value contains HTML # Must be encoded into str because HTMLPage.build_doc() uses BytesIO # which expects bytes html_page = HTMLPage(self.browser, self.response) return html_page.build_doc(content['data'].encode(self.encoding)) return content
def __init__(self, *args, **kwargs): HTMLPage.__init__(self, *args, **kwargs) add_content = CleanText('(//body/script)[3]', replace=[('window.FLUX_STATE = ', '')])(self.doc) api_content = CleanText('(//body/script)[2]', replace=[('window.APP_CONFIG = ', '')])(self.doc) self.htmldoc = self.doc self.api_content = json.loads(api_content) self.doc = json.loads(add_content)
def __init__(self, *args, **kwargs): HTMLPage.__init__(self, *args, **kwargs) json_content = Regexp( CleanText('//script'), r"window\[\"initialData\"\] = JSON.parse\(\"({.*})\"\);window\[\"tags\"\]" )(self.doc) json_content = codecs.unicode_escape_decode(json_content)[0] json_content = json_content.encode('utf-8', 'surrogatepass').decode('utf-8') self.doc = json.loads(json_content)
def build_doc(self, content): # Store the HTML doc to count the number of spaces self.html_doc = HTMLPage(self.browser, self.response).doc # Transform the HTML tag containing the accounts list into a JSON raw = re.search(r"syntheseController\.init\((.*)\)'>", content).group(1) d = json.JSONDecoder() # De-comment this line to debug the JSON accounts: # print json.dumps(d.raw_decode(raw)[0]) return d.raw_decode(raw)[0]
def __init__(self, *args, **kwargs): HTMLPage.__init__(self, *args, **kwargs) add_content = CleanText('(//body/script)[4]', replace=[('window.FLUX_STATE = ', '') ])(self.doc) api_content = CleanText('(//body/script)[3]', replace=[('window.APP_CONFIG = ', '') ])(self.doc) self.htmldoc = self.doc self.api_content = json.loads(api_content) self.doc = json.loads(add_content)
def on_load(self): HTMLPage.on_load(self) msg = CleanText('//div[has-class("form-input-label")]', default='')(self.doc) if "prendre connaissance des nouvelles conditions" in msg: raise ActionNeeded(msg) msg = CleanText('//span[@id="txtErrorAccesBase"]')(self.doc) if 'Merci de nous envoyer' in msg: raise ActionNeeded(msg) # website sometime crash if self.doc.xpath(u'//div[@id="divError"]/span[contains(text(),"Une erreur est survenue")]'): raise BrowserUnavailable()
def login(self, username, password): self.location('https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=https://www.facebook.com/connect/login_success.html&scope=basic_info,email,public_profile,user_about_me,user_activities,user_birthday,user_education_history,user_friends,user_interests,user_likes,user_location,user_photos,user_relationship_details&response_type=token' % self.CLIENT_ID) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password form['persistent'] = 1 form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) m = re.search('access_token=([^&]+)&', self.url) if m: self.access_token = m.group(1) self.info = self.request('/me')
def on_load(self): HTMLPage.on_load(self) msg = CleanText('//div[has-class("form-input-label")]', default='')(self.doc) if "prendre connaissance des nouvelles conditions" in msg: raise ActionNeeded(msg) msg = CleanText('//span[@id="txtErrorAccesBase"]')(self.doc) if 'Merci de nous envoyer' in msg: raise ActionNeeded(msg) # website sometime crash if self.doc.xpath( u'//div[@id="divError"]/span[contains(text(),"Une erreur est survenue")]' ): raise BrowserUnavailable()
def login(self, username, password): self.location('https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=https://www.facebook.com/connect/login_success.html&scope=email,user_birthday,user_friends,public_profile,user_photos,user_likes&response_type=token' % self.CLIENT_ID) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password form['persistent'] = 1 form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) m = re.search('access_token=([^&]+)&', self.url) if m: self.access_token = m.group(1) self.info = self.request('/me')
def build_doc(self, content): # Store the HTML doc to count the number of spaces self.html_doc = HTMLPage(self.browser, self.response).doc # Transform the HTML tag containing the accounts list into a JSON raw = re.search("syntheseController\.init\((.*)\)'>", content).group(1) d = json.JSONDecoder() # De-comment this line to debug the JSON accounts: # print json.dumps(d.raw_decode(raw)[0]) return d.raw_decode(raw)[0]
def login(self, username, password): self.location( "https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=https://www.facebook.com/connect/login_success.html&scope=email,user_birthday,user_friends,public_profile,user_photos,user_likes&response_type=token" % self.CLIENT_ID ) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form["email"] = username form["pass"] = password form["persistent"] = 1 form.submit(allow_redirects=False) if "Location" not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers["Location"]) m = re.search("access_token=([^&]+)&", self.url) if m: self.access_token = m.group(1) self.info = self.request("/me")
def build_doc(self, content): # HTML embedded in XML: parse XML first then extract the html xml = XMLPage.build_doc(self, content) transactions_html = ( xml.xpath('//partial-response/changes/update[1]')[0].text.encode( encoding=self.encoding)) investments_html = ( xml.xpath('//partial-response/changes/update[2]')[0].text.encode( encoding=self.encoding)) html = transactions_html + investments_html return HTMLPage.build_doc(self, html)
def login(self, username, password): self.location('https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=fbconnect://success&scope=email,user_birthday,user_friends,public_profile,user_photos,user_likes&response_type=token' % self.CLIENT_ID) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) page = HTMLPage(self, self.response) if len(page.doc.xpath('//td/div[has-class("s")]')) > 0: raise BrowserIncorrectPassword(CleanText('//td/div[has-class("s")]')(page.doc)) form = page.get_form(nr=0, submit='//input[@name="__CONFIRM__"]') form.submit() m = re.search('access_token=([^&]+)&', self.response.text) if m: self.access_token = m.group(1) else: raise ParseError('Unable to find access_token') self.info = self.request('/me')
def login(self, username, password): self.location( 'https://www.facebook.com/v2.6/dialog/oauth?redirect_uri=fb464891386855067%3A%2F%2Fauthorize%2F&display=touch&state=%7B%22challenge%22%3A%22IUUkEUqIGud332lfu%252BMJhxL4Wlc%253D%22%2C%220_auth_logger_id%22%3A%2230F06532-A1B9-4B10-BB28-B29956C71AB1%22%2C%22com.facebook.sdk_client_state%22%3Atrue%2C%223_method%22%3A%22sfvc_auth%22%7D&scope=user_birthday%2Cuser_photos%2Cuser_education_history%2Cemail%2Cuser_relationship_details%2Cuser_friends%2Cuser_work_history%2Cuser_likes&response_type=token%2Csigned_request&default_audience=friends&return_scopes=true&auth_type=rerequest&client_id=' + self.CLIENT_ID + '&ret=login&sdk=ios&logger_id=30F06532-A1B9-4B10-BB28-B29956C71AB1&ext=1470840777&hash=AeZqkIcf-NEW6vBd' ) page = HTMLPage(self, self.response) form = page.get_form() form['email'] = username form['pass'] = password form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) page = HTMLPage(self, self.response) if len(page.doc.xpath('//td/div[has-class("s")]')) > 0: raise BrowserIncorrectPassword( CleanText('//td/div[has-class("s")]')(page.doc)) form = page.get_form(nr=0, submit='//input[@name="__CONFIRM__"]') form.submit() m = re.search('access_token=([^&]+)&', self.response.text) if m: self.access_token = m.group(1) else: raise ParseError('Unable to find access_token') self.info = self.request('/me')
def login(self, username, password): self.location( 'https://www.facebook.com/dialog/oauth?client_id=%s&redirect_uri=https://www.facebook.com/connect/login_success.html&scope=basic_info,email,public_profile,user_about_me,user_activities,user_birthday,user_education_history,user_friends,user_interests,user_likes,user_location,user_photos,user_relationship_details&response_type=token' % self.CLIENT_ID) page = HTMLPage(self, self.response) form = page.get_form('//form[@id="login_form"]') form['email'] = username form['pass'] = password form['persistent'] = 1 for script in page.doc.xpath('//script'): m = re.search('"_js_datr","([^"]+)"', script.text or '') if m: self.session.cookies.set('_js_datr', m.group(1)) form.submit(allow_redirects=False) if 'Location' not in self.response.headers: raise BrowserIncorrectPassword() self.location(self.response.headers['Location']) m = re.search('access_token=([^&]+)&', self.url) if m: self.access_token = m.group(1) self.info = self.request('/me')
def get_profile(self, id): profile = {} if datetime.now().hour >= 18 or datetime.now().hour < 1: return profile r = None try: r = self.open('https://www.adopteunmec.com/profile/%s' % id) except BrowserUnavailable: pass if r is None or not re.match('https://www.adopteunmec.com/profile/\d+', r.url): self.login() try: r = self.open('https://www.adopteunmec.com/profile/%s' % id) except BrowserUnavailable: r = None if r is None: return {} page = HTMLPage(self, r) doc = page.doc profile['popu'] = {} for tr in doc.xpath('//div[@id="popularity"]//tr'): cols = tr.findall('td') if not cols[0].text: continue key = CleanText('./th')(tr).strip().lower() value = int(re.sub(u'[^0-9]+', u'', cols[0].text).strip()) profile['popu'][key] = value for script in doc.xpath('//script'): text = script.text if text is None: continue m = re.search("'memberLat'\s*:\s*([\-\d\.]+),", text, re.IGNORECASE) if m: profile['lat'] = float(m.group(1)) m = re.search("'memberLng'\s*:\s*([\-\d\.]+),", text, re.IGNORECASE) if m: profile['lng'] = float(m.group(1)) return profile
def urlinfo(self, url, maxback=2): if urlparse.urlsplit(url).netloc == 'mobile.twitter.com': url = url.replace('mobile.twitter.com', 'twitter.com', 1) try: r = self.open(url, method='HEAD') body = False except HTTPNotFound as e: if maxback and not url[-1].isalnum(): return self.urlinfo(url[:-1], maxback - 1) raise e except BrowserHTTPError as e: if e.response.status_code in (501, 405): r = self.open(url) body = True else: raise e content_type = r.headers.get('Content-Type') try: size = int(r.headers.get('Content-Length')) hsize = self.human_size(size) except TypeError: size = None hsize = None is_html = ('html' in content_type) if content_type else re.match( r'\.x?html?$', url) title = None if is_html: if not body: r = self.open(url) # update size has we might not have it from headers size = len(r.content) hsize = self.human_size(size) page = HTMLPage(self, r) for title in page.doc.xpath('//head/title'): title = to_unicode(title.text_content()).strip() title = ' '.join(title.split()) if urlparse.urlsplit(url).netloc.endswith('twitter.com'): for title in page.doc.getroot().cssselect( '.permalink-tweet .tweet-text'): title = to_unicode(title.text_content()).strip() title = ' '.join(title.splitlines()) return content_type, hsize, title
def on_load(self): HTMLPage.on_load(self) # website sometime crash if self.doc.xpath(u'//div[@id="divError"]/span[contains(text(),"Une erreur est survenue")]'): raise BrowserUnavailable()
def build_doc(self, text): doc = super(TrackPage, self).build_doc(text) content = ''.join([doc['top'], doc['tab']]) html_page = HTMLPage(self.browser, self.response) return html_page.build_doc(content.encode(self.encoding))
class AccountsPage(LoggedPage, JsonPage): def build_doc(self, content): # Store the HTML doc to count the number of spaces self.html_doc = HTMLPage(self.browser, self.response).doc # Transform the HTML tag containing the accounts list into a JSON raw = re.search(r"syntheseController\.init\((.*)\)'>", content).group(1) d = json.JSONDecoder() # De-comment this line to debug the JSON accounts: # print json.dumps(d.raw_decode(raw)[0]) return d.raw_decode(raw)[0] def count_spaces(self): ''' The total number of spaces corresponds to the number of available space choices plus the one we are on now. Some professional connections have a very specific xpath so we must look for nodes with 'idBamIndex' as well as "HubAccounts-link--cael" otherwise there might be space duplicates.''' return (len( self.html_doc.xpath( '//a[contains(@class, "HubAccounts-link--cael") and contains(@href, "idBamIndex=")]' )) + 1) def get_space_type(self): return Dict('marche')(self.doc) def get_owner_type(self): OWNER_TYPES = { 'PARTICULIER': AccountOwnerType.PRIVATE, 'HORS_MARCHE': AccountOwnerType.PRIVATE, 'PROFESSIONNEL': AccountOwnerType.ORGANIZATION, 'AGRICULTEUR': AccountOwnerType.ORGANIZATION, 'PROMOTEURS': AccountOwnerType.ORGANIZATION, 'ENTREPRISE': AccountOwnerType.ORGANIZATION, 'PROFESSION_LIBERALE': AccountOwnerType.ORGANIZATION, 'ASSOC_CA_MODERE': AccountOwnerType.ASSOCIATION, } return OWNER_TYPES.get(Dict('marche')(self.doc), NotAvailable) def get_connection_id(self): connection_id = Regexp( CleanText('//script[contains(text(), "NPC.utilisateur.ccptea")]'), r"NPC.utilisateur.ccptea = '(\d+)';")(self.html_doc) return connection_id def has_main_account(self): return Dict('comptePrincipal', default=None)(self.doc) @method class get_main_account(ItemElement): klass = Account obj_id = CleanText(Dict('comptePrincipal/numeroCompte')) obj_number = CleanText(Dict('comptePrincipal/numeroCompte')) def obj_owner_type(self): return self.page.get_owner_type() def obj_label(self): if Field('owner_type')(self) == AccountOwnerType.PRIVATE: # All the accounts have the same owner if it is private, # so adding the owner in the libelle is useless. return CleanText(Dict('comptePrincipal/libelleProduit'))(self) return Format( '%s %s', CleanText(Dict('comptePrincipal/libelleProduit')), CleanText(Dict('comptePrincipal/libellePartenaireBam')), )(self) def obj_balance(self): balance = Dict('comptePrincipal/solde', default=NotAvailable)(self) if not empty(balance): return Eval(float_to_decimal, balance)(self) return NotAvailable obj_currency = CleanCurrency(Dict('comptePrincipal/idDevise')) obj__index = Dict('comptePrincipal/index') obj__category = Dict('comptePrincipal/grandeFamilleProduitCode', default=None) obj__id_element_contrat = CleanText( Dict('comptePrincipal/idElementContrat')) obj__fam_product_code = CleanText( Dict('comptePrincipal/codeFamilleProduitBam')) obj__fam_contract_code = CleanText( Dict('comptePrincipal/codeFamilleContratBam')) def obj_type(self): _type = Map(CleanText(Dict('comptePrincipal/libelleUsuelProduit')), ACCOUNT_TYPES, Account.TYPE_UNKNOWN)(self) if _type == Account.TYPE_UNKNOWN: self.logger.warning( 'We got an untyped account: please add "%s" to ACCOUNT_TYPES.', CleanText( Dict('comptePrincipal/libelleUsuelProduit'))(self)) return _type def has_main_cards(self): return Dict('comptePrincipal/cartesDD', default=None)(self.doc) @method class iter_main_cards(DictElement): item_xpath = 'comptePrincipal/cartesDD' class item(ItemElement): # Main account cards are all deferred and their # coming is already displayed with a '-' sign. klass = Account def condition(self): card_situation = Dict('codeSituationCarte')(self) if card_situation not in (5, 7): # Cards with codeSituationCarte equal to 7 are active and present on the website # Cards with codeSituationCarte equal to 5 are absent on the website, we skip them self.logger.warning( 'codeSituationCarte unknown, Check if the %s card is present on the website', Field('id')(self)) return card_situation != 5 obj_id = CleanText(Dict('idCarte'), replace=[(' ', '')]) obj_number = Field('id') obj_label = Format('Carte %s %s', Field('id'), CleanText(Dict('titulaire'))) obj_type = Account.TYPE_CARD obj_coming = Eval(float_to_decimal, Dict('encoursCarteM')) obj_balance = Decimal(0) obj__index = Dict('index') obj__id_element_contrat = None @method class iter_accounts(DictElement): item_xpath = 'grandesFamilles/*/elementsContrats' class item(ItemElement): IGNORED_ACCOUNT_FAMILIES = ( 'MES ASSURANCES', 'VOS ASSURANCES', ) klass = Account def obj_id(self): # Loan/credit ids may be duplicated so we use the contract number for now: if (Field('type')(self) in (Account.TYPE_LOAN, Account.TYPE_CONSUMER_CREDIT, Account.TYPE_REVOLVING_CREDIT)): return CleanText(Dict('idElementContrat'))(self) return CleanText(Dict('numeroCompte'))(self) obj_number = CleanText(Dict('numeroCompte')) obj_currency = CleanCurrency(Dict('idDevise')) obj__index = Dict('index') obj__category = Coalesce(Dict('grandeFamilleProduitCode', default=None), Dict('sousFamilleProduit/niveau', default=None), default=None) obj__id_element_contrat = CleanText(Dict('idElementContrat')) obj__fam_product_code = CleanText(Dict('codeFamilleProduitBam')) obj__fam_contract_code = CleanText(Dict('codeFamilleContratBam')) def obj_owner_type(self): return self.page.get_owner_type() def obj_label(self): if Field('owner_type')(self) == AccountOwnerType.PRIVATE: # All the accounts have the same owner if it is private, # so adding the owner in the libelle is useless. return CleanText(Dict('libelleProduit'))(self) return Format( '%s %s', CleanText(Dict('libelleProduit')), CleanText(Dict('libellePartenaireBam')), )(self) def obj_type(self): if CleanText( Dict('libelleUsuelProduit'))(self) in ('HABITATION', ): # No need to log warning for "assurance" accounts return NotAvailable _type = Map(CleanText(Dict('libelleUsuelProduit')), ACCOUNT_TYPES, Account.TYPE_UNKNOWN)(self) if _type == Account.TYPE_UNKNOWN: self.logger.warning( 'There is an untyped account: please add "%s" to ACCOUNT_TYPES.', CleanText(Dict('libelleUsuelProduit'))(self)) return _type def obj_balance(self): balance = Dict('solde', default=None)(self) if balance: return Eval(float_to_decimal, balance)(self) # We will fetch the balance with account_details return NotAvailable def condition(self): # Ignore insurances (plus they all have identical IDs) # Ignore some credits not displayed on the website return CleanText(Dict('familleProduit/libelle', default=''))(self) not in self.IGNORED_ACCOUNT_FAMILIES \ and 'non affiche' not in CleanText(Dict('sousFamilleProduit/libelle', default=''))(self) \ and 'Inactif' not in CleanText(Dict('libelleSituationContrat', default=''))(self)
class AccountsPage(LoggedPage, JsonPage): def build_doc(self, content): # Store the HTML doc to count the number of spaces self.html_doc = HTMLPage(self.browser, self.response).doc # Transform the HTML tag containing the accounts list into a JSON raw = re.search("syntheseController\.init\((.*)\)'>", content).group(1) d = json.JSONDecoder() # De-comment this line to debug the JSON accounts: # print json.dumps(d.raw_decode(raw)[0]) return d.raw_decode(raw)[0] def count_spaces(self): ''' The total number of spaces corresponds to the number of available space choices plus the one we are on now. Some professional connections have a very specific xpath so we must look for nodes with 'idBamIndex' as well as "HubAccounts-link--cael" otherwise there might be space duplicates.''' return len(self.html_doc.xpath('//a[contains(@class, "HubAccounts-link--cael") and contains(@href, "idBamIndex=")]')) + 1 def get_owner_type(self): OWNER_TYPES = { 'PARTICULIER': AccountOwnerType.PRIVATE, 'PROFESSIONNEL': AccountOwnerType.ORGANIZATION, 'AGRICULTEUR': AccountOwnerType.ORGANIZATION, 'ASSOC_CA_MODERE': AccountOwnerType.ASSOCIATION, } return OWNER_TYPES.get(Dict('marche')(self.doc), NotAvailable) def get_connection_id(self): connection_id = Regexp( CleanText('//script[contains(text(), "NPC.utilisateur.ccptea")]'), r"NPC.utilisateur.ccptea = '(\d+)';" )(self.html_doc) return connection_id @method class get_main_account(ItemElement): klass = Account obj_id = CleanText(Dict('comptePrincipal/numeroCompte')) obj_number = CleanText(Dict('comptePrincipal/numeroCompte')) obj_label = CleanText(Dict('comptePrincipal/libelleProduit')) def obj_balance(self): balance = Dict('comptePrincipal/solde', default=NotAvailable)(self) if not empty(balance): return Eval(float_to_decimal, balance)(self) return NotAvailable obj_currency = CleanCurrency(Dict('comptePrincipal/idDevise')) obj__index = Dict('comptePrincipal/index') obj__category = Dict('comptePrincipal/grandeFamilleProduitCode', default=None) obj__id_element_contrat = CleanText(Dict('comptePrincipal/idElementContrat')) obj__fam_product_code = CleanText(Dict('comptePrincipal/codeFamilleProduitBam')) obj__fam_contract_code = CleanText(Dict('comptePrincipal/codeFamilleContratBam')) def obj_type(self): _type = Map(CleanText(Dict('comptePrincipal/libelleUsuelProduit')), ACCOUNT_TYPES, Account.TYPE_UNKNOWN)(self) if _type == Account.TYPE_UNKNOWN: self.logger.warning('We got an untyped account: please add "%s" to ACCOUNT_TYPES.' % CleanText(Dict('comptePrincipal/libelleUsuelProduit'))(self)) return _type @method class iter_accounts(DictElement): item_xpath = 'grandesFamilles/*/elementsContrats' class item(ItemElement): IGNORED_ACCOUNTS = ('MES ASSURANCES', 'VOS ASSURANCES',) klass = Account def obj_id(self): # Loan ids may be duplicated so we use the contract number for now: if Field('type')(self) == Account.TYPE_LOAN: return CleanText(Dict('idElementContrat'))(self) return CleanText(Dict('numeroCompte'))(self) obj_number = CleanText(Dict('numeroCompte')) obj_label = CleanText(Dict('libelleProduit')) obj_currency = CleanCurrency(Dict('idDevise')) obj__index = Dict('index') obj__category = Dict('grandeFamilleProduitCode', default=None) obj__id_element_contrat = CleanText(Dict('idElementContrat')) obj__fam_product_code = CleanText(Dict('codeFamilleProduitBam')) obj__fam_contract_code = CleanText(Dict('codeFamilleContratBam')) def obj_type(self): if CleanText(Dict('libelleUsuelProduit'))(self) in ('HABITATION',): # No need to log warning for "assurance" accounts return NotAvailable _type = Map(CleanText(Dict('libelleUsuelProduit')), ACCOUNT_TYPES, Account.TYPE_UNKNOWN)(self) if _type == Account.TYPE_UNKNOWN: self.logger.warning('There is an untyped account: please add "%s" to ACCOUNT_TYPES.' % CleanText(Dict('libelleUsuelProduit'))(self)) return _type def obj_balance(self): balance = Dict('solde', default=None)(self) if balance: return Eval(float_to_decimal, balance)(self) # We will fetch the balance with account_details return NotAvailable def condition(self): # Ignore insurances (plus they all have identical IDs) return CleanText(Dict('familleProduit/libelle', default=''))(self) not in self.IGNORED_ACCOUNTS