def get_video(self, video=None): _id = to_unicode(self.group_dict['id']) if video is None: video = YoujizzVideo(_id) title_el = self.parser.select(self.document.getroot(), 'title', 1) video.title = to_unicode(title_el.text.strip()) # youjizz HTML is crap, we must parse it with regexps data = lxml.html.tostring(self.document.getroot()) m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)</div>', data) if m: txt = m.group(1).strip() if txt == 'Unknown': video.duration = NotAvailable else: minutes, seconds = (int(v) for v in to_unicode(txt).split(':')) video.duration = datetime.timedelta(minutes=minutes, seconds=seconds) else: raise BrokenPageError('Unable to retrieve video duration') real_id = int(_id.split('-')[-1]) data = self.browser.readurl('http://www.youjizz.com/videos/embed/%s' % real_id) video_file_urls = re.findall( r'"(http://[^",]+\.youjizz\.com[^",]+\.flv(?:\?[^"]*)?)"', data) if len(video_file_urls) == 0: raise BrokenPageError('Video URL not found') elif len(video_file_urls) > 1: raise BrokenPageError('Many video file URL found') else: video.url = to_unicode(video_file_urls[0]) return video
def get_video(self, video=None): if video is None: video = DailymotionVideo(self.group_dict['id']) div = self.parser.select(self.document.getroot(), 'div#content', 1) video.title = unicode(self.parser.select(div, 'span.title', 1).text).strip() video.author = unicode(self.parser.select(div, 'a.name, span.name, a[rel=author]', 1).text).strip() try: video.description = html2text(self.parser.tostring(self.parser.select(div, 'div#video_description', 1))).strip() or unicode() except BrokenPageError: video.description = u'' for script in self.parser.select(self.document.getroot(), 'div.dmco_html'): # TODO support videos from anyclip, cf http://www.dailymotion.com/video/xkyjiv for example if 'id' in script.attrib and script.attrib['id'].startswith('container_player_') and \ script.find('script') is not None: text = script.find('script').text mobj = re.search(r'\s*var flashvars = (.*)', text) if mobj is None: raise BrokenPageError('Unable to extract video url') flashvars = urllib.unquote(mobj.group(1)) for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']: if key in flashvars: max_quality = key break mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars) if mobj is None: raise BrokenPageError('Unable to extract video url') video.url = urllib.unquote(mobj.group(1)).replace('\\/', '/') video.set_empty_fields(NotAvailable) return video
def get_session(self): try: frame = self.document.xpath('//frame[@name="FrameWork"]')[0] except IndexError: raise BrokenPageError('Unable to find session token') m = re.search('sessionid=([^& "]+)', frame.attrib['src']) if not m: raise BrokenPageError('Unable to find session token') return m.group(1)
def set_details(self, v): v.author = u'European Parliament' obj = self.parser.select(self.document.getroot(), 'meta[name=available]', 1) if obj is not None: value = obj.attrib['content'] print value m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) day = m.group(1) month = m.group(2) year = m.group(3) hour = m.group(4) minute = m.group(5) v.date = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(hour), minute=int(minute)) obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle', 1) if obj is not None: span = self.parser.select(obj, 'span.ep_date', 1) value = span.text m = re.match( '(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) bhour = m.group(1) bminute = m.group(2) ehour = m.group(3) eminute = m.group(4) day = m.group(5) month = m.group(6) year = m.group(7) start = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(bhour), minute=int(bminute)) end = datetime.datetime(year=int(year), month=int(month), day=int(day), hour=int(ehour), minute=int(eminute)) v.duration = end - start
def iter_videos(self): try: ul = self.parser.select(self.document.getroot(), 'div.container-videos ul', 1) except BrokenPageError: # It means there are no results. return for li in ul.findall('li'): id = re.sub(self.URL_REGEXP, r'\1', li.find('a').attrib['href']) video = InaVideo('boutique.%s' % id) video.thumbnail = Thumbnail(u'http://boutique.ina.fr%s' % li.find('a').find('img').attrib['src']) video.title = unicode(self.parser.select(li, 'p.titre', 1).text) date = self.parser.select(li, 'p.date', 1).text day, month, year = [int(s) for s in date.split('/')] video.date = datetime.datetime(year, month, day) duration = self.parser.select(li, 'p.duree', 1).text m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration) if m: video.duration = datetime.timedelta(hours=int(m.group(2) or 0), minutes=int( m.group(4) or 0), seconds=int(m.group(5))) else: raise BrokenPageError('Unable to match duration (%r)' % duration) yield video
def login(self): assert isinstance(self.username, basestring) assert isinstance(self.password, basestring) assert self.password.isdigit() data = {'Ident': self.username} r = self.readurl('https://client.hsbc.fr/cgi-bin/emcgi?Appl=WEBACC', urllib.urlencode(data), if_fail='raise') m = re.search('sessionid=([^ "]+)', r, flags=re.MULTILINE) if not m: raise BrowserIncorrectPassword() self._session = m.group(1) data = {'Secret': self.password} r = self.readurl( 'https://client.hsbc.fr/cgi-bin/emcgi?sessionid=%s' % self._session, urllib.urlencode(data)) if r.find('Erreur Identification') >= 0: raise BrowserIncorrectPassword() m = re.search('url = "/cgi-bin/emcgi\?sessionid=([^& "]+)&debr="', r, flags=re.MULTILINE) if not m: raise BrokenPageError('Unable to find session token') self._session = m.group(1)
def iter_videos(self): span_list = self.parser.select(self.document.getroot(), 'span#miniatura') for span in span_list: a = self.parser.select(span, 'a', 1) url = a.attrib['href'] _id = re.sub(r'/videos/(.+)\.html', r'\1', url) video = YoujizzVideo(_id) video.thumbnail = Thumbnail( unicode(span.find('.//img').attrib['src'])) title_el = self.parser.select(span, 'span#title1', 1) video.title = to_unicode(title_el.text.strip()) time_span = self.parser.select(span, 'span.thumbtime span', 1) time_txt = time_span.text.strip().replace(';', ':') if time_txt == 'N/A': minutes, seconds = 0, 0 elif ':' in time_txt: minutes, seconds = (int(v) for v in time_txt.split(':')) else: raise BrokenPageError( 'Unable to parse the video duration: %s' % time_txt) video.duration = datetime.timedelta(minutes=minutes, seconds=seconds) yield video
def get_accounts_list(self): self.location( self.buildurl('/cyber/internet/StartTask.do', taskInfoOID='mesComptes', token=self.token)) if self.page.is_error(): self.location( self.buildurl('/cyber/internet/StartTask.do', taskInfoOID='mesComptesPRO', token=self.token)) if self.page.is_error(): self.location( self.buildurl('/cyber/internet/StartTask.do', taskInfoOID='maSyntheseGratuite', token=self.token)) if self.page.is_error(): self.location( self.buildurl('/cyber/internet/StartTask.do', taskInfoOID='accueilSynthese', token=self.token)) if self.page.is_error(): raise BrokenPageError('Unable to go on the accounts list page') if self.page.is_short_list(): self.select_form(nr=0) self.set_all_readonly(False) self['dialogActionPerformed'] = 'EQUIPEMENT_COMPLET' self.submit() self.token = self.page.get_token() return self.page.get_list()
def get_history(self): tables = self.document.xpath('//table[@id="table-detail-operation"]') if len(tables) == 0: tables = self.document.xpath('//table[@id="table-detail"]') if len(tables) == 0: tables = self.document.getroot().cssselect('table.table-detail') if len(tables) == 0: try: self.parser.select(self.document.getroot(), 'td.no-result', 1) except BrokenPageError: raise BrokenPageError('Unable to find table?') else: return for tr in tables[0].xpath('.//tr'): tds = tr.findall('td') if len(tds) < 4: continue t = Transaction(0) date = u''.join([txt.strip() for txt in tds[self.COL_DATE].itertext()]) raw = u''.join([txt.strip() for txt in tds[self.COL_TEXT].itertext()]) debit = u''.join([txt.strip() for txt in tds[self.COL_DEBIT].itertext()]) credit = u''.join([txt.strip() for txt in tds[self.COL_CREDIT].itertext()]) t.parse(date, re.sub(r'[ ]+', ' ', raw)) t.set_amount(credit, debit) yield t
def get_video_url(self, format=38): formats = {} for script in self.parser.select(self.document.getroot(), 'script'): text = script.text if not text: continue pattern = "yt.playerConfig = " pos = text.find(pattern) if pos < 0: continue sub = text[pos+len(pattern):pos+text[pos:].find('\n')].rstrip(';') a = json.loads(sub) for part in a['args']['url_encoded_fmt_stream_map'].split(','): args = dict(parse_qsl(part)) formats[int(args['itag'])] = args['url'] + '&signature=' + args['sig'] break # choose the better format to use. for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]: if format in formats: url = formats.get(format) ext = self.FORMAT_EXTENSIONS.get(format, 'flv') return url, ext raise BrokenPageError('Unable to find file URL')
def set_details(self, v): for li in self.parser.select(self.document.getroot(), 'ul.spaced li'): span = li.find('label') name = span.text.strip() value = span.tail.strip() if name == 'Duration:': m = re.match('((\d+)hrs)?\s*((\d+)min)?\s*((\d+)sec)?', value) if not m: raise BrokenPageError('Unable to parse datetime: %r' % value) hours = m.group(2) or 0 minutes = m.group(4) or 0 seconds = m.group(6) or 0 v.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) elif name == 'Submitted:': author = li.find('i') if author is None: author = li.find('a') if author is None: v.author = unicode(value) else: v.author = unicode(author.text) elif name == 'Rating:': value = li.find('span').text v.rating = int(value.rstrip('%')) v.rating_max = 100 elif name == 'Date:': v.date = parse_dt(value)
def iter_station_departures(self, station_id, arrival_id=None): url = u'http://widget.canaltp.fr/Prochains_departs_15122009/dev/index.php?gare=%s' % unicode( station_id) result = self.openurl(url.encode('utf-8')).read() result = result departure = '' for line in result.split('&'): if not '=' in line: raise BrokenPageError('Unable to parse result: %s' % line) key, value = line.split('=', 1) if key == 'nomgare': departure = value elif key.startswith('ligne'): _type, unknown, _time, arrival, served, late, late_reason = value.split( ';', 6) yield { 'type': to_unicode(_type), 'time': datetime.combine(date.today(), time(*[int(x) for x in _time.split(':')])), 'departure': to_unicode(departure), 'arrival': to_unicode(arrival).strip(), 'late': late and time(0, int(late.split()[0])) or time(), 'late_reason': to_unicode(late_reason).replace('\n', '').strip() }
def get_history(self): txt = self.get_from_js('ListeMvts_data = new Array(', ');') if txt is None: no_trans = self.get_from_js('js_noMvts = new Ext.Panel(', ')') if no_trans is not None: # there is no transactions for this account, this is normal. return else: raise BrokenPageError( 'Unable to find transactions list in scripts') data = json.loads('[%s]' % txt.replace('"', '\\"').replace("'", '"')) for line in data: t = Transaction(line[self.COL_ID]) if self.is_coming is not None: t.type = t.TYPE_CARD date = self.parser.strip(line[self.COL_DEBIT_DATE]) else: date = self.parser.strip(line[self.COL_DATE]) raw = self.parser.strip(line[self.COL_LABEL]) t.parse(date, raw) t.set_amount(line[self.COL_VALUE]) if t.date is NotAvailable: continue if self.set_coming(t): continue yield t
def get_date_and_duration(self): el = self.document.getroot().cssselect( 'div.bloc-produit-haut p.date')[0] if el is not None: return self.parse_date_and_duration(el.text.strip()) else: raise BrokenPageError('Unable to find date and duration element')
def get_history(self, date_guesser): seen = set() lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr') for line in lines[1:]: # first line is balance is_balance = line.xpath('./td/@class="cel-texte cel-neg"') [date, label, _, amount ] = [self.parser.tocleanstring(td) for td in line.xpath('./td')] t = Transaction(0) t.set_amount(amount) t.label = t.raw = label if is_balance: m = re.search('(\d+ [^ ]+ \d+)', label) if not m: raise BrokenPageError( 'Unable to read card balance in history: %r' % label) t.date = parse_french_date(m.group(1)) t.amount = -t.amount else: day, month = map(int, date.split('/', 1)) t.date = date_guesser.guess_date(day, month) t.type = t.TYPE_CARD t.rdate = t.date try: t.id = t.unique_id(seen) except UnicodeEncodeError: print t print t.label raise yield t
def get_messages_link(self): """ Get the link to the messages page, which seems to have an identifier in it. """ for link in self.parser.select(self.document.getroot(), 'div#pantalon div.interieur a'): if 'MessagesRecus' in link.attrib.get('href', ''): return link.attrib['href'] raise BrokenPageError('Unable to find the link to the messages page')
def login3(self, passwd): self.browser.select_form(name='Main') self.browser['codconf'] = passwd a = self.document.xpath('//a[@title="Valider"]')[0] m = re.match("javascript:RedirectToDeiPart\('([^']+)'\);", a.attrib['href']) if not m: raise BrokenPageError('Unable to find validate URL') self.browser.form.action = m.group(1) self.browser.submit(nologin=True)
def iter_videos(self): if self.document is None or self.document['data'] is None: raise BrokenPageError('Unable to find JSON data') for data in self.document['data']: video = GDCVaultVideo.get_video_from_json(data) # TODO: split type 4 videos into id and id#slides if video is None: continue yield video
def iter_videos(self): for div in self.parser.select(self.document.getroot(), 'div.dmpi_video_item'): _id = div.attrib.get('data-id', None) if _id is None: self.browser.logger.warning('Unable to find the ID of a video') continue video = DailymotionVideo(_id) video.title = unicode(self.parser.select(div, 'h3 a', 1).text).strip() video.author = unicode( self.parser.select(div, 'div.dmpi_user_login', 1).find('a').find('span').text).strip() video.description = html2text( self.parser.tostring( self.parser.select(div, 'div.dmpi_video_description', 1))).strip() or unicode() try: parts = self.parser.select(div, 'div.duration', 1).text.split(':') except BrokenPageError: # it's probably a live, np. video.duration = NotAvailable else: if len(parts) == 1: seconds = parts[0] hours = minutes = 0 elif len(parts) == 2: minutes, seconds = parts hours = 0 elif len(parts) == 3: hours, minutes, seconds = parts else: raise BrokenPageError( 'Unable to parse duration %r' % self.parser.select(div, 'div.duration', 1).text) video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) url = unicode( self.parser.select(div, 'img.dmco_image', 1).attrib['data-src']) # remove the useless anti-caching url = re.sub('\?\d+', '', url) # use the bigger thumbnail url = url.replace('jpeg_preview_medium.jpg', 'jpeg_preview_large.jpg') video.thumbnail = Thumbnail(unicode(url)) rating_div = self.parser.select(div, 'div.small_stars', 1) video.rating_max = self.get_rate(rating_div) video.rating = self.get_rate(rating_div.find('div')) video.set_empty_fields(NotAvailable, ('url', )) yield video
def get_video(self, video=None): if video is None: video = DailymotionVideo(self.group_dict['id']) div = self.parser.select(self.document.getroot(), 'div#content', 1) video.title = unicode(self.parser.select(div, 'span.title', 1).text).strip() video.author = unicode( self.parser.select(div, 'a.name, span.name, a[rel=author]', 1).text).strip() try: video.description = html2text( self.parser.tostring( self.parser.select(div, 'div#video_description', 1))).strip() or unicode() except BrokenPageError: video.description = u'' embed_page = self.browser.readurl( 'http://www.dailymotion.com/embed/video/%s' % video.id) m = re.search('var info = ({.*?}),[^{"]', embed_page) if not m: raise BrokenPageError('Unable to find information about video') info = json.loads(m.group(1)) for key in [ 'stream_h264_hd1080_url', 'stream_h264_hd_url', 'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url' ]: if info.get(key): #key in info and info[key]: max_quality = key break else: raise BrokenPageError(u'Unable to extract video URL') video.url = info[max_quality] video.set_empty_fields(NotAvailable) return video
def parse_date_and_duration(self, text): duration_regexp = re.compile('(.* - )?(.+) - ((.+)h)?((.+)min)?(.+)s') m = duration_regexp.match(text) if m: day, month, year = [int(s) for s in m.group(2).split('/')] date = datetime.datetime(year, month, day) duration = datetime.timedelta( hours=int(m.group(4) if m.group(4) is not None else 0), minutes=int(m.group(6) if m.group(6) is not None else 0), seconds=int(m.group(7))) return date, duration else: raise BrokenPageError('Unable to parse date and duration')
def get_url(self): download_div = self.parser.select(self.document.getroot(), 'ul.downloadList li') if len(download_div) < 1: raise BrokenPageError('Unable to find file URL') a = self.parser.select(download_div[0], 'a', 1) m = re.match('^(\w+) - .*', a.text) if m: ext = m.group(1).lower() else: ext = u'flv' return unicode(a.attrib['href']), unicode(ext)
def iter_videos(self): # When no results are found, the website returns random results sb = self.parser.select(self.document.getroot(), 'div.search form input.searchbox', 1) if sb.value == 'No Results Found': return #Extracting meta data from results page vidbackdrop_list = self.parser.select(self.document.getroot(), 'div.vidBackdrop ') for vidbackdrop in vidbackdrop_list: url = self.parser.select(vidbackdrop, 'a', 1).attrib['href'] _id = url[2:] video = CappedVideo(_id) video.set_empty_fields(NotAvailable, ('url', )) video.title = to_unicode( self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text) video.author = to_unicode( self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text) thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id video.thumbnail = Thumbnail(thumbnail_url) #we get the description field duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1) #we remove tabs and spaces duration_tmp2 = duration_tmp.text[7:] #we remove all fields exept time duration_tmp3 = duration_tmp2.split(' ')[0] #we transform it in datetime format parts = duration_tmp3.split(':') if len(parts) == 1: hours = minutes = 0 seconds = parts[0] elif len(parts) == 2: hours = 0 minutes, seconds = parts elif len(parts) == 3: hours, minutes, seconds = parts else: raise BrokenPageError('Unable to parse duration %r' % duration_tmp) video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) yield video
def get_list(self): accounts = [] for tr in self.document.getiterator('tr'): if not 'LGNTableRow' in tr.attrib.get('class', '').split(): continue account = Account() for td in tr.getiterator('td'): if td.attrib.get('headers', '') == 'TypeCompte': a = td.find('a') if a is None: break account.label = unicode(a.find("span").text) account._link_id = a.get('href', '') elif td.attrib.get('headers', '') == 'NumeroCompte': id = td.text id = id.replace(u'\xa0','') account.id = id elif td.attrib.get('headers', '') == 'Libelle': pass elif td.attrib.get('headers', '') == 'Solde': div = td.xpath('./div[@class="Solde"]') if len(div) > 0: balance = self.parser.tocleanstring(div[0]) if len(balance) > 0 and balance not in ('ANNULEE', 'OPPOSITION'): try: account.balance = Decimal(FrenchTransaction.clean_amount(balance)) except InvalidOperation: raise BrokenPageError('Unable to parse balance %r' % balance) account.currency = account.get_currency(balance) else: account.balance = NotAvailable if not account.label or empty(account.balance): continue if 'CARTE_' in account._link_id: ac = accounts[0] ac._card_links.append(account._link_id) if not ac.coming: ac.coming = Decimal('0.0') ac.coming += account.balance else: account._card_links = [] accounts.append(account) return iter(accounts)
def recap(self): if len(self.document.xpath('//p[@class="alert alert-success"]')) == 0: raise BrokenPageError('Unable to find confirmation') div = self.document.find( '//div[@class="encadre transfert-validation"]') transfer = Transfer(0) transfer.amount = Decimal(FrenchTransaction.clean_amount( div.xpath('.//label[@id="confirmtransferAmount"]')[0].text)) transfer.origin = div.xpath( './/span[@id="confirmfromAccount"]')[0].text transfer.recipient = div.xpath( './/span[@id="confirmtoAccount"]')[0].text transfer.reason = unicode( div.xpath('.//span[@id="confirmtransferMotive"]')[0].text) return transfer
def fill_paste(self, paste): header = self.parser.select(self.document.getroot(), 'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath') paste.title = unicode(self.parser.select(header, '//div[@class="paste_box_line1"]//h1', 1, 'xpath').text) paste.contents = unicode(self.parser.select(self.document.getroot(), '//textarea[@id="paste_code"]', 1, 'xpath').text) visibility_text = self.parser.select(header, '//div[@class="paste_box_line1"]//img', 1, 'xpath').attrib['title'] if visibility_text.startswith('Public'): paste.public = True elif visibility_text.startswith('Unlisted') or visibility_text.startswith('Private'): paste.public = False else: raise BrokenPageError('Unable to get the paste visibility') return paste
def get_history(self, date_guesser): seen = set() lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr') debit_date = None for i, line in enumerate(lines): is_balance = line.xpath('./td/@class="cel-texte cel-neg"') # It is possible to have three or four columns. cols = [self.parser.tocleanstring(td) for td in line.xpath('./td')] date = cols[0] label = cols[1] amount = cols[-1] t = Transaction(i) t.set_amount(amount) t.label = t.raw = label if is_balance: m = re.search('(\d+ [^ ]+ \d+)', label) if not m: raise BrokenPageError( 'Unable to read card balance in history: %r' % label) debit_date = parse_french_date(m.group(1)) # Skip the first line because it is balance if i == 0: continue t.date = t.rdate = debit_date # Consider the second one as a positive amount to reset balance to 0. t.amount = -t.amount else: day, month = map(int, date.split('/', 1)) t.rdate = date_guesser.guess_date(day, month) t.date = debit_date t.type = t.TYPE_CARD try: t.id = t.unique_id(seen) except UnicodeEncodeError: print t print t.label raise yield t
def iter_videos(self): videos = self.document.getroot().cssselect("div[class=video]") for div in videos: title = div.find('h2').find('a').text m = re.match(r'/(fr|de|en)/videos/(.*)\.html', div.find('h2').find('a').attrib['href']) _id = '' if m: _id = m.group(2) rating = rating_max = 0 rates = self.parser.select(div, 'div[class=rateContainer]', 1) for r in rates.findall('div'): if 'star-rating-on' in r.attrib['class']: rating += 1 rating_max += 1 video = ArteVideo(_id) video.title = unicode(title) video.rating = rating video.rating_max = rating_max thumb = self.parser.select(div, 'img[class=thumbnail]', 1) video.thumbnail = Thumbnail(u'http://videos.arte.tv' + thumb.attrib['src']) try: parts = self.parser.select(div, 'div.duration_thumbnail', 1).text.split(':') if len(parts) == 2: hours = 0 minutes, seconds = parts elif len(parts) == 3: hours, minutes, seconds = parts else: raise BrokenPageError('Unable to parse duration %r' % parts) except BrokenPageError: pass else: video.duration = datetime.timedelta(hours=int(hours), minutes=int(minutes), seconds=int(seconds)) video.set_empty_fields(NotAvailable, ('url', )) yield video
def get_list(self): accounts = [] txt = self.get_from_js('_data = new Array(', ');', is_list=True) if txt is None: raise BrokenPageError('Unable to find accounts list in scripts') data = json.loads('[%s]' % txt.replace("'", '"')) for line in data: a = Account() a.id = line[self.COL_ID].replace(' ', '') fp = StringIO( unicode(line[self.COL_LABEL]).encode(self.browser.ENCODING)) a.label = self.parser.tocleanstring( self.parser.parse(fp, self.browser.ENCODING).xpath( '//div[@class="libelleCompteTDB"]')[0]) a.balance = Decimal( FrenchTransaction.clean_amount(line[self.COL_BALANCE])) a._link = self.get_history_link() if line[self.COL_HISTORY] == 'true': a._args = { '_eventId': 'clicDetailCompte', '_ipc_eventValue': '', '_ipc_fireEvent': '', 'deviseAffichee': 'DEVISE', 'execution': self.get_execution(), 'idCompteClique': line[self.COL_ID], } else: a._args = None if a.id.find('_CarteVisa') >= 0: accounts[-1]._card_ids.append(a._args) if not accounts[-1].coming: accounts[-1].coming = Decimal('0.0') accounts[-1].coming += a.balance continue a._card_ids = [] accounts.append(a) return iter(accounts)
def confirm(self, password): try: vk = INGVirtKeyboard(self) except VirtKeyboardError as err: error("Error: %s" % err) return realpasswd = "" span = self.document.find('//span[@id="digitpadtransfer"]') i = 0 for font in span.getiterator('font'): if font.attrib.get('class') == "vide": realpasswd += password[i] i += 1 confirmform = None for form in self.document.xpath('//form'): try: if form.attrib['name'][ 0:4] == "j_id" and 'enctype' not in form.attrib: confirmform = form break except: continue if confirmform is None: raise BrokenPageError('Unable to find confirm form') formname = confirmform.attrib['name'] self.browser.logger.debug('We are looking for : ' + realpasswd) self.browser.select_form(formname) self.browser.set_all_readonly(False) for a in self.browser.controls[:]: if "_link_hidden_" in str(a) or "j_idcl" in str(a): self.browser.controls.remove(a) coordinates = vk.get_string_code(realpasswd) self.browser.logger.debug("Coordonates: " + coordinates) self.browser.controls.append( ClientForm.TextControl('text', 'AJAXREQUEST', {'value': '_viewRoot'})) self.browser.controls.append( ClientForm.TextControl('text', '%s:mrgtransfer' % formname, {'value': '%s:mrgtransfer' % formname})) self.browser['%s:mrltransfer' % formname] = coordinates self.browser.submit(nologin=True)