Example #1
0
    def get_video(self, video=None):
        _id = to_unicode(self.group_dict['id'])
        if video is None:
            video = YoujizzVideo(_id)
        title_el = self.parser.select(self.document.getroot(), 'title', 1)
        video.title = to_unicode(title_el.text.strip())

        # youjizz HTML is crap, we must parse it with regexps
        data = lxml.html.tostring(self.document.getroot())
        m = re.search(r'<strong>.*?Runtime.*?</strong> (.+?)</div>', data)
        if m:
            txt = m.group(1).strip()
            if txt == 'Unknown':
                video.duration = NotAvailable
            else:
                minutes, seconds = (int(v) for v in to_unicode(txt).split(':'))
                video.duration = datetime.timedelta(minutes=minutes,
                                                    seconds=seconds)
        else:
            raise BrokenPageError('Unable to retrieve video duration')

        real_id = int(_id.split('-')[-1])
        data = self.browser.readurl('http://www.youjizz.com/videos/embed/%s' %
                                    real_id)

        video_file_urls = re.findall(
            r'"(http://[^",]+\.youjizz\.com[^",]+\.flv(?:\?[^"]*)?)"', data)
        if len(video_file_urls) == 0:
            raise BrokenPageError('Video URL not found')
        elif len(video_file_urls) > 1:
            raise BrokenPageError('Many video file URL found')
        else:
            video.url = to_unicode(video_file_urls[0])

        return video
Example #2
0
    def get_video(self, video=None):
        if video is None:
            video = DailymotionVideo(self.group_dict['id'])

        div = self.parser.select(self.document.getroot(), 'div#content', 1)

        video.title = unicode(self.parser.select(div, 'span.title', 1).text).strip()
        video.author = unicode(self.parser.select(div, 'a.name, span.name, a[rel=author]', 1).text).strip()
        try:
            video.description = html2text(self.parser.tostring(self.parser.select(div, 'div#video_description', 1))).strip() or unicode()
        except BrokenPageError:
            video.description = u''
        for script in self.parser.select(self.document.getroot(), 'div.dmco_html'):
            # TODO support videos from anyclip, cf http://www.dailymotion.com/video/xkyjiv for example
            if 'id' in script.attrib and script.attrib['id'].startswith('container_player_') and \
               script.find('script') is not None:
                text = script.find('script').text
                mobj = re.search(r'\s*var flashvars = (.*)', text)
                if mobj is None:
                    raise BrokenPageError('Unable to extract video url')
                flashvars = urllib.unquote(mobj.group(1))
                for key in ['hd1080URL', 'hd720URL', 'hqURL', 'sdURL', 'ldURL', 'video_url']:
                    if key in flashvars:
                        max_quality = key
                        break

                mobj = re.search(r'"' + max_quality + r'":"(.+?)"', flashvars)
                if mobj is None:
                    raise BrokenPageError('Unable to extract video url')
                video.url = urllib.unquote(mobj.group(1)).replace('\\/', '/')

        video.set_empty_fields(NotAvailable)

        return video
Example #3
0
    def get_session(self):
        try:
            frame = self.document.xpath('//frame[@name="FrameWork"]')[0]
        except IndexError:
            raise BrokenPageError('Unable to find session token')

        m = re.search('sessionid=([^& "]+)', frame.attrib['src'])
        if not m:
            raise BrokenPageError('Unable to find session token')
        return m.group(1)
Example #4
0
    def set_details(self, v):
        v.author = u'European Parliament'
        obj = self.parser.select(self.document.getroot(),
                                 'meta[name=available]', 1)
        if obj is not None:
            value = obj.attrib['content']
            print value
            m = re.match('(\d\d)-(\d\d)-(\d\d\d\d)\s*(\d\d):(\d\d)', value)
            if not m:
                raise BrokenPageError('Unable to parse datetime: %r' % value)
            day = m.group(1)
            month = m.group(2)
            year = m.group(3)
            hour = m.group(4)
            minute = m.group(5)
            v.date = datetime.datetime(year=int(year),
                                       month=int(month),
                                       day=int(day),
                                       hour=int(hour),
                                       minute=int(minute))

        obj = self.parser.select(self.document.getroot(), 'span.ep_subtitle',
                                 1)
        if obj is not None:
            span = self.parser.select(obj, 'span.ep_date', 1)
            value = span.text
            m = re.match(
                '(\d\d):(\d\d)\s*\/\s*(\d\d):(\d\d)\s*-\s*(\d\d)-(\d\d)-(\d\d\d\d)',
                value)
            if not m:
                raise BrokenPageError('Unable to parse datetime: %r' % value)
            bhour = m.group(1)
            bminute = m.group(2)
            ehour = m.group(3)
            eminute = m.group(4)
            day = m.group(5)
            month = m.group(6)
            year = m.group(7)

            start = datetime.datetime(year=int(year),
                                      month=int(month),
                                      day=int(day),
                                      hour=int(bhour),
                                      minute=int(bminute))
            end = datetime.datetime(year=int(year),
                                    month=int(month),
                                    day=int(day),
                                    hour=int(ehour),
                                    minute=int(eminute))

            v.duration = end - start
Example #5
0
    def iter_videos(self):
        try:
            ul = self.parser.select(self.document.getroot(),
                                    'div.container-videos ul', 1)
        except BrokenPageError:
            # It means there are no results.
            return
        for li in ul.findall('li'):
            id = re.sub(self.URL_REGEXP, r'\1', li.find('a').attrib['href'])

            video = InaVideo('boutique.%s' % id)

            video.thumbnail = Thumbnail(u'http://boutique.ina.fr%s' %
                                        li.find('a').find('img').attrib['src'])

            video.title = unicode(self.parser.select(li, 'p.titre', 1).text)

            date = self.parser.select(li, 'p.date', 1).text
            day, month, year = [int(s) for s in date.split('/')]
            video.date = datetime.datetime(year, month, day)

            duration = self.parser.select(li, 'p.duree', 1).text
            m = re.match(r'((\d+)h)?((\d+)min)?(\d+)s', duration)
            if m:
                video.duration = datetime.timedelta(hours=int(m.group(2) or 0),
                                                    minutes=int(
                                                        m.group(4) or 0),
                                                    seconds=int(m.group(5)))
            else:
                raise BrokenPageError('Unable to match duration (%r)' %
                                      duration)

            yield video
Example #6
0
    def login(self):
        assert isinstance(self.username, basestring)
        assert isinstance(self.password, basestring)
        assert self.password.isdigit()

        data = {'Ident': self.username}
        r = self.readurl('https://client.hsbc.fr/cgi-bin/emcgi?Appl=WEBACC',
                         urllib.urlencode(data),
                         if_fail='raise')
        m = re.search('sessionid=([^ "]+)', r, flags=re.MULTILINE)
        if not m:
            raise BrowserIncorrectPassword()

        self._session = m.group(1)

        data = {'Secret': self.password}
        r = self.readurl(
            'https://client.hsbc.fr/cgi-bin/emcgi?sessionid=%s' %
            self._session, urllib.urlencode(data))
        if r.find('Erreur Identification') >= 0:
            raise BrowserIncorrectPassword()

        m = re.search('url = "/cgi-bin/emcgi\?sessionid=([^& "]+)&debr="',
                      r,
                      flags=re.MULTILINE)
        if not m:
            raise BrokenPageError('Unable to find session token')
        self._session = m.group(1)
Example #7
0
    def iter_videos(self):
        span_list = self.parser.select(self.document.getroot(),
                                       'span#miniatura')
        for span in span_list:
            a = self.parser.select(span, 'a', 1)
            url = a.attrib['href']
            _id = re.sub(r'/videos/(.+)\.html', r'\1', url)

            video = YoujizzVideo(_id)

            video.thumbnail = Thumbnail(
                unicode(span.find('.//img').attrib['src']))

            title_el = self.parser.select(span, 'span#title1', 1)
            video.title = to_unicode(title_el.text.strip())

            time_span = self.parser.select(span, 'span.thumbtime span', 1)
            time_txt = time_span.text.strip().replace(';', ':')
            if time_txt == 'N/A':
                minutes, seconds = 0, 0
            elif ':' in time_txt:
                minutes, seconds = (int(v) for v in time_txt.split(':'))
            else:
                raise BrokenPageError(
                    'Unable to parse the video duration: %s' % time_txt)

            video.duration = datetime.timedelta(minutes=minutes,
                                                seconds=seconds)

            yield video
Example #8
0
    def get_accounts_list(self):
        self.location(
            self.buildurl('/cyber/internet/StartTask.do',
                          taskInfoOID='mesComptes',
                          token=self.token))
        if self.page.is_error():
            self.location(
                self.buildurl('/cyber/internet/StartTask.do',
                              taskInfoOID='mesComptesPRO',
                              token=self.token))
        if self.page.is_error():
            self.location(
                self.buildurl('/cyber/internet/StartTask.do',
                              taskInfoOID='maSyntheseGratuite',
                              token=self.token))
        if self.page.is_error():
            self.location(
                self.buildurl('/cyber/internet/StartTask.do',
                              taskInfoOID='accueilSynthese',
                              token=self.token))
        if self.page.is_error():
            raise BrokenPageError('Unable to go on the accounts list page')

        if self.page.is_short_list():
            self.select_form(nr=0)
            self.set_all_readonly(False)
            self['dialogActionPerformed'] = 'EQUIPEMENT_COMPLET'
            self.submit()

        self.token = self.page.get_token()

        return self.page.get_list()
Example #9
0
    def get_history(self):
        tables = self.document.xpath('//table[@id="table-detail-operation"]')
        if len(tables) == 0:
            tables = self.document.xpath('//table[@id="table-detail"]')
        if len(tables) == 0:
            tables = self.document.getroot().cssselect('table.table-detail')
        if len(tables) == 0:
            try:
                self.parser.select(self.document.getroot(), 'td.no-result', 1)
            except BrokenPageError:
                raise BrokenPageError('Unable to find table?')
            else:
                return

        for tr in tables[0].xpath('.//tr'):
            tds = tr.findall('td')
            if len(tds) < 4:
                continue

            t = Transaction(0)
            date = u''.join([txt.strip() for txt in tds[self.COL_DATE].itertext()])
            raw = u''.join([txt.strip() for txt in tds[self.COL_TEXT].itertext()])
            debit = u''.join([txt.strip() for txt in tds[self.COL_DEBIT].itertext()])
            credit = u''.join([txt.strip() for txt in tds[self.COL_CREDIT].itertext()])

            t.parse(date, re.sub(r'[ ]+', ' ', raw))
            t.set_amount(credit, debit)

            yield t
Example #10
0
    def get_video_url(self, format=38):
        formats = {}
        for script in self.parser.select(self.document.getroot(), 'script'):
            text = script.text
            if not text:
                continue

            pattern = "yt.playerConfig = "
            pos = text.find(pattern)
            if pos < 0:
                continue

            sub = text[pos+len(pattern):pos+text[pos:].find('\n')].rstrip(';')
            a = json.loads(sub)

            for part in a['args']['url_encoded_fmt_stream_map'].split(','):
                args = dict(parse_qsl(part))
                formats[int(args['itag'])] = args['url'] + '&signature=' + args['sig']

            break

        # choose the better format to use.
        for format in self.AVAILABLE_FORMATS[self.AVAILABLE_FORMATS.index(format):]:
            if format in formats:
                url = formats.get(format)
                ext = self.FORMAT_EXTENSIONS.get(format, 'flv')
                return url, ext

        raise BrokenPageError('Unable to find file URL')
Example #11
0
    def set_details(self, v):
        for li in self.parser.select(self.document.getroot(), 'ul.spaced li'):
            span = li.find('label')
            name = span.text.strip()
            value = span.tail.strip()

            if name == 'Duration:':
                m = re.match('((\d+)hrs)?\s*((\d+)min)?\s*((\d+)sec)?', value)
                if not m:
                    raise BrokenPageError('Unable to parse datetime: %r' %
                                          value)
                hours = m.group(2) or 0
                minutes = m.group(4) or 0
                seconds = m.group(6) or 0
                v.duration = datetime.timedelta(hours=int(hours),
                                                minutes=int(minutes),
                                                seconds=int(seconds))
            elif name == 'Submitted:':
                author = li.find('i')
                if author is None:
                    author = li.find('a')
                if author is None:
                    v.author = unicode(value)
                else:
                    v.author = unicode(author.text)
            elif name == 'Rating:':
                value = li.find('span').text
                v.rating = int(value.rstrip('%'))
                v.rating_max = 100
            elif name == 'Date:':
                v.date = parse_dt(value)
Example #12
0
 def iter_station_departures(self, station_id, arrival_id=None):
     url = u'http://widget.canaltp.fr/Prochains_departs_15122009/dev/index.php?gare=%s' % unicode(
         station_id)
     result = self.openurl(url.encode('utf-8')).read()
     result = result
     departure = ''
     for line in result.split('&'):
         if not '=' in line:
             raise BrokenPageError('Unable to parse result: %s' % line)
         key, value = line.split('=', 1)
         if key == 'nomgare':
             departure = value
         elif key.startswith('ligne'):
             _type, unknown, _time, arrival, served, late, late_reason = value.split(
                 ';', 6)
             yield {
                 'type':
                 to_unicode(_type),
                 'time':
                 datetime.combine(date.today(),
                                  time(*[int(x)
                                         for x in _time.split(':')])),
                 'departure':
                 to_unicode(departure),
                 'arrival':
                 to_unicode(arrival).strip(),
                 'late':
                 late and time(0, int(late.split()[0])) or time(),
                 'late_reason':
                 to_unicode(late_reason).replace('\n', '').strip()
             }
Example #13
0
    def get_history(self):
        txt = self.get_from_js('ListeMvts_data = new Array(', ');')

        if txt is None:
            no_trans = self.get_from_js('js_noMvts = new Ext.Panel(', ')')
            if no_trans is not None:
                # there is no transactions for this account, this is normal.
                return
            else:
                raise BrokenPageError(
                    'Unable to find transactions list in scripts')

        data = json.loads('[%s]' % txt.replace('"', '\\"').replace("'", '"'))

        for line in data:
            t = Transaction(line[self.COL_ID])

            if self.is_coming is not None:
                t.type = t.TYPE_CARD
                date = self.parser.strip(line[self.COL_DEBIT_DATE])
            else:
                date = self.parser.strip(line[self.COL_DATE])
            raw = self.parser.strip(line[self.COL_LABEL])

            t.parse(date, raw)
            t.set_amount(line[self.COL_VALUE])

            if t.date is NotAvailable:
                continue

            if self.set_coming(t):
                continue

            yield t
Example #14
0
 def get_date_and_duration(self):
     el = self.document.getroot().cssselect(
         'div.bloc-produit-haut p.date')[0]
     if el is not None:
         return self.parse_date_and_duration(el.text.strip())
     else:
         raise BrokenPageError('Unable to find date and duration element')
Example #15
0
    def get_history(self, date_guesser):
        seen = set()
        lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr')
        for line in lines[1:]:  # first line is balance
            is_balance = line.xpath('./td/@class="cel-texte cel-neg"')

            [date, label, _, amount
             ] = [self.parser.tocleanstring(td) for td in line.xpath('./td')]

            t = Transaction(0)
            t.set_amount(amount)
            t.label = t.raw = label

            if is_balance:
                m = re.search('(\d+ [^ ]+ \d+)', label)
                if not m:
                    raise BrokenPageError(
                        'Unable to read card balance in history: %r' % label)

                t.date = parse_french_date(m.group(1))
                t.amount = -t.amount
            else:
                day, month = map(int, date.split('/', 1))
                t.date = date_guesser.guess_date(day, month)

            t.type = t.TYPE_CARD
            t.rdate = t.date
            try:
                t.id = t.unique_id(seen)
            except UnicodeEncodeError:
                print t
                print t.label
                raise

            yield t
Example #16
0
 def get_messages_link(self):
     """
     Get the link to the messages page, which seems to have an identifier in it.
     """
     for link in self.parser.select(self.document.getroot(), 'div#pantalon div.interieur a'):
         if 'MessagesRecus' in link.attrib.get('href', ''):
             return link.attrib['href']
     raise BrokenPageError('Unable to find the link to the messages page')
Example #17
0
 def login3(self, passwd):
     self.browser.select_form(name='Main')
     self.browser['codconf'] = passwd
     a = self.document.xpath('//a[@title="Valider"]')[0]
     m = re.match("javascript:RedirectToDeiPart\('([^']+)'\);", a.attrib['href'])
     if not m:
         raise BrokenPageError('Unable to find validate URL')
     self.browser.form.action = m.group(1)
     self.browser.submit(nologin=True)
Example #18
0
 def iter_videos(self):
     if self.document is None or self.document['data'] is None:
         raise BrokenPageError('Unable to find JSON data')
     for data in self.document['data']:
         video = GDCVaultVideo.get_video_from_json(data)
         # TODO: split type 4 videos into id and id#slides
         if video is None:
             continue
         yield video
Example #19
0
    def iter_videos(self):
        for div in self.parser.select(self.document.getroot(),
                                      'div.dmpi_video_item'):
            _id = div.attrib.get('data-id', None)

            if _id is None:
                self.browser.logger.warning('Unable to find the ID of a video')
                continue

            video = DailymotionVideo(_id)
            video.title = unicode(self.parser.select(div, 'h3 a',
                                                     1).text).strip()
            video.author = unicode(
                self.parser.select(div, 'div.dmpi_user_login',
                                   1).find('a').find('span').text).strip()
            video.description = html2text(
                self.parser.tostring(
                    self.parser.select(div, 'div.dmpi_video_description',
                                       1))).strip() or unicode()
            try:
                parts = self.parser.select(div, 'div.duration',
                                           1).text.split(':')
            except BrokenPageError:
                # it's probably a live, np.
                video.duration = NotAvailable
            else:
                if len(parts) == 1:
                    seconds = parts[0]
                    hours = minutes = 0
                elif len(parts) == 2:
                    minutes, seconds = parts
                    hours = 0
                elif len(parts) == 3:
                    hours, minutes, seconds = parts
                else:
                    raise BrokenPageError(
                        'Unable to parse duration %r' %
                        self.parser.select(div, 'div.duration', 1).text)
                video.duration = datetime.timedelta(hours=int(hours),
                                                    minutes=int(minutes),
                                                    seconds=int(seconds))
            url = unicode(
                self.parser.select(div, 'img.dmco_image',
                                   1).attrib['data-src'])
            # remove the useless anti-caching
            url = re.sub('\?\d+', '', url)
            # use the bigger thumbnail
            url = url.replace('jpeg_preview_medium.jpg',
                              'jpeg_preview_large.jpg')
            video.thumbnail = Thumbnail(unicode(url))

            rating_div = self.parser.select(div, 'div.small_stars', 1)
            video.rating_max = self.get_rate(rating_div)
            video.rating = self.get_rate(rating_div.find('div'))

            video.set_empty_fields(NotAvailable, ('url', ))
            yield video
Example #20
0
    def get_video(self, video=None):
        if video is None:
            video = DailymotionVideo(self.group_dict['id'])

        div = self.parser.select(self.document.getroot(), 'div#content', 1)

        video.title = unicode(self.parser.select(div, 'span.title',
                                                 1).text).strip()
        video.author = unicode(
            self.parser.select(div, 'a.name, span.name, a[rel=author]',
                               1).text).strip()
        try:
            video.description = html2text(
                self.parser.tostring(
                    self.parser.select(div, 'div#video_description',
                                       1))).strip() or unicode()
        except BrokenPageError:
            video.description = u''

        embed_page = self.browser.readurl(
            'http://www.dailymotion.com/embed/video/%s' % video.id)

        m = re.search('var info = ({.*?}),[^{"]', embed_page)
        if not m:
            raise BrokenPageError('Unable to find information about video')

        info = json.loads(m.group(1))
        for key in [
                'stream_h264_hd1080_url', 'stream_h264_hd_url',
                'stream_h264_hq_url', 'stream_h264_url', 'stream_h264_ld_url'
        ]:
            if info.get(key):  #key in info and info[key]:
                max_quality = key
                break
        else:
            raise BrokenPageError(u'Unable to extract video URL')
        video.url = info[max_quality]

        video.set_empty_fields(NotAvailable)

        return video
Example #21
0
 def parse_date_and_duration(self, text):
     duration_regexp = re.compile('(.* - )?(.+) - ((.+)h)?((.+)min)?(.+)s')
     m = duration_regexp.match(text)
     if m:
         day, month, year = [int(s) for s in m.group(2).split('/')]
         date = datetime.datetime(year, month, day)
         duration = datetime.timedelta(
             hours=int(m.group(4) if m.group(4) is not None else 0),
             minutes=int(m.group(6) if m.group(6) is not None else 0),
             seconds=int(m.group(7)))
         return date, duration
     else:
         raise BrokenPageError('Unable to parse date and duration')
Example #22
0
    def get_url(self):
        download_div = self.parser.select(self.document.getroot(),
                                          'ul.downloadList li')
        if len(download_div) < 1:
            raise BrokenPageError('Unable to find file URL')

        a = self.parser.select(download_div[0], 'a', 1)
        m = re.match('^(\w+) - .*', a.text)
        if m:
            ext = m.group(1).lower()
        else:
            ext = u'flv'
        return unicode(a.attrib['href']), unicode(ext)
Example #23
0
    def iter_videos(self):
        # When no results are found, the website returns random results
        sb = self.parser.select(self.document.getroot(),
                                'div.search form input.searchbox', 1)
        if sb.value == 'No Results Found':
            return

        #Extracting meta data from results page
        vidbackdrop_list = self.parser.select(self.document.getroot(),
                                              'div.vidBackdrop    ')
        for vidbackdrop in vidbackdrop_list:
            url = self.parser.select(vidbackdrop, 'a', 1).attrib['href']
            _id = url[2:]

            video = CappedVideo(_id)
            video.set_empty_fields(NotAvailable, ('url', ))

            video.title = to_unicode(
                self.parser.select(vidbackdrop, 'div.vidTitle a', 1).text)
            video.author = to_unicode(
                self.parser.select(vidbackdrop, 'div.vidAuthor a', 1).text)

            thumbnail_url = 'http://cdn.capped.tv/pre/%s.png' % _id
            video.thumbnail = Thumbnail(thumbnail_url)

            #we get the description field
            duration_tmp = self.parser.select(vidbackdrop, 'div.vidInfo', 1)
            #we remove tabs and spaces
            duration_tmp2 = duration_tmp.text[7:]
            #we remove all fields exept time
            duration_tmp3 = duration_tmp2.split(' ')[0]
            #we transform it in datetime format
            parts = duration_tmp3.split(':')
            if len(parts) == 1:
                hours = minutes = 0
                seconds = parts[0]
            elif len(parts) == 2:
                hours = 0
                minutes, seconds = parts
            elif len(parts) == 3:
                hours, minutes, seconds = parts
            else:
                raise BrokenPageError('Unable to parse duration %r' %
                                      duration_tmp)

            video.duration = datetime.timedelta(hours=int(hours),
                                                minutes=int(minutes),
                                                seconds=int(seconds))

            yield video
Example #24
0
    def get_list(self):
        accounts = []
        for tr in self.document.getiterator('tr'):
            if not 'LGNTableRow' in tr.attrib.get('class', '').split():
                continue

            account = Account()
            for td in tr.getiterator('td'):
                if td.attrib.get('headers', '') == 'TypeCompte':
                    a = td.find('a')
                    if a is None:
                        break
                    account.label = unicode(a.find("span").text)
                    account._link_id = a.get('href', '')

                elif td.attrib.get('headers', '') == 'NumeroCompte':
                    id = td.text
                    id = id.replace(u'\xa0','')
                    account.id = id

                elif td.attrib.get('headers', '') == 'Libelle':
                    pass

                elif td.attrib.get('headers', '') == 'Solde':
                    div = td.xpath('./div[@class="Solde"]')
                    if len(div) > 0:
                        balance = self.parser.tocleanstring(div[0])
                        if len(balance) > 0 and balance not in ('ANNULEE', 'OPPOSITION'):
                            try:
                                account.balance = Decimal(FrenchTransaction.clean_amount(balance))
                            except InvalidOperation:
                                raise BrokenPageError('Unable to parse balance %r' % balance)
                            account.currency = account.get_currency(balance)
                        else:
                            account.balance = NotAvailable

            if not account.label or empty(account.balance):
                continue

            if 'CARTE_' in account._link_id:
                ac = accounts[0]
                ac._card_links.append(account._link_id)
                if not ac.coming:
                    ac.coming = Decimal('0.0')
                ac.coming += account.balance
            else:
                account._card_links = []
                accounts.append(account)
        return iter(accounts)
Example #25
0
 def recap(self):
     if len(self.document.xpath('//p[@class="alert alert-success"]')) == 0:
         raise BrokenPageError('Unable to find confirmation')
     div = self.document.find(
             '//div[@class="encadre transfert-validation"]')
     transfer = Transfer(0)
     transfer.amount = Decimal(FrenchTransaction.clean_amount(
         div.xpath('.//label[@id="confirmtransferAmount"]')[0].text))
     transfer.origin = div.xpath(
             './/span[@id="confirmfromAccount"]')[0].text
     transfer.recipient = div.xpath(
             './/span[@id="confirmtoAccount"]')[0].text
     transfer.reason = unicode(
             div.xpath('.//span[@id="confirmtransferMotive"]')[0].text)
     return transfer
Example #26
0
 def fill_paste(self, paste):
     header = self.parser.select(self.document.getroot(),
             'id("content_left")//div[@class="paste_box_info"]', 1, 'xpath')
     paste.title = unicode(self.parser.select(header,
             '//div[@class="paste_box_line1"]//h1', 1, 'xpath').text)
     paste.contents = unicode(self.parser.select(self.document.getroot(),
             '//textarea[@id="paste_code"]', 1, 'xpath').text)
     visibility_text = self.parser.select(header,
             '//div[@class="paste_box_line1"]//img', 1, 'xpath').attrib['title']
     if visibility_text.startswith('Public'):
         paste.public = True
     elif visibility_text.startswith('Unlisted') or visibility_text.startswith('Private'):
         paste.public = False
     else:
         raise BrokenPageError('Unable to get the paste visibility')
     return paste
Example #27
0
    def get_history(self, date_guesser):
        seen = set()
        lines = self.document.xpath('(//table[@class="ca-table"])[2]/tr')
        debit_date = None
        for i, line in enumerate(lines):
            is_balance = line.xpath('./td/@class="cel-texte cel-neg"')

            # It is possible to have three or four columns.
            cols = [self.parser.tocleanstring(td) for td in line.xpath('./td')]
            date = cols[0]
            label = cols[1]
            amount = cols[-1]

            t = Transaction(i)
            t.set_amount(amount)
            t.label = t.raw = label

            if is_balance:
                m = re.search('(\d+ [^ ]+ \d+)', label)
                if not m:
                    raise BrokenPageError(
                        'Unable to read card balance in history: %r' % label)

                debit_date = parse_french_date(m.group(1))

                # Skip the first line because it is balance
                if i == 0:
                    continue

                t.date = t.rdate = debit_date

                # Consider the second one as a positive amount to reset balance to 0.
                t.amount = -t.amount
            else:
                day, month = map(int, date.split('/', 1))
                t.rdate = date_guesser.guess_date(day, month)
                t.date = debit_date

            t.type = t.TYPE_CARD
            try:
                t.id = t.unique_id(seen)
            except UnicodeEncodeError:
                print t
                print t.label
                raise

            yield t
Example #28
0
    def iter_videos(self):
        videos = self.document.getroot().cssselect("div[class=video]")
        for div in videos:
            title = div.find('h2').find('a').text
            m = re.match(r'/(fr|de|en)/videos/(.*)\.html',
                         div.find('h2').find('a').attrib['href'])
            _id = ''
            if m:
                _id = m.group(2)
            rating = rating_max = 0
            rates = self.parser.select(div, 'div[class=rateContainer]', 1)
            for r in rates.findall('div'):
                if 'star-rating-on' in r.attrib['class']:
                    rating += 1
                rating_max += 1

            video = ArteVideo(_id)
            video.title = unicode(title)
            video.rating = rating
            video.rating_max = rating_max

            thumb = self.parser.select(div, 'img[class=thumbnail]', 1)
            video.thumbnail = Thumbnail(u'http://videos.arte.tv' +
                                        thumb.attrib['src'])

            try:
                parts = self.parser.select(div, 'div.duration_thumbnail',
                                           1).text.split(':')
                if len(parts) == 2:
                    hours = 0
                    minutes, seconds = parts
                elif len(parts) == 3:
                    hours, minutes, seconds = parts
                else:
                    raise BrokenPageError('Unable to parse duration %r' %
                                          parts)
            except BrokenPageError:
                pass
            else:
                video.duration = datetime.timedelta(hours=int(hours),
                                                    minutes=int(minutes),
                                                    seconds=int(seconds))

            video.set_empty_fields(NotAvailable, ('url', ))

            yield video
Example #29
0
    def get_list(self):
        accounts = []

        txt = self.get_from_js('_data = new Array(', ');', is_list=True)

        if txt is None:
            raise BrokenPageError('Unable to find accounts list in scripts')

        data = json.loads('[%s]' % txt.replace("'", '"'))

        for line in data:
            a = Account()
            a.id = line[self.COL_ID].replace(' ', '')
            fp = StringIO(
                unicode(line[self.COL_LABEL]).encode(self.browser.ENCODING))
            a.label = self.parser.tocleanstring(
                self.parser.parse(fp, self.browser.ENCODING).xpath(
                    '//div[@class="libelleCompteTDB"]')[0])
            a.balance = Decimal(
                FrenchTransaction.clean_amount(line[self.COL_BALANCE]))
            a._link = self.get_history_link()
            if line[self.COL_HISTORY] == 'true':
                a._args = {
                    '_eventId': 'clicDetailCompte',
                    '_ipc_eventValue': '',
                    '_ipc_fireEvent': '',
                    'deviseAffichee': 'DEVISE',
                    'execution': self.get_execution(),
                    'idCompteClique': line[self.COL_ID],
                }
            else:
                a._args = None

            if a.id.find('_CarteVisa') >= 0:
                accounts[-1]._card_ids.append(a._args)
                if not accounts[-1].coming:
                    accounts[-1].coming = Decimal('0.0')
                accounts[-1].coming += a.balance
                continue

            a._card_ids = []
            accounts.append(a)

        return iter(accounts)
Example #30
0
    def confirm(self, password):
        try:
            vk = INGVirtKeyboard(self)
        except VirtKeyboardError as err:
            error("Error: %s" % err)
            return
        realpasswd = ""
        span = self.document.find('//span[@id="digitpadtransfer"]')
        i = 0
        for font in span.getiterator('font'):
            if font.attrib.get('class') == "vide":
                realpasswd += password[i]
            i += 1
        confirmform = None
        for form in self.document.xpath('//form'):
            try:
                if form.attrib['name'][
                        0:4] == "j_id" and 'enctype' not in form.attrib:
                    confirmform = form
                    break
            except:
                continue
        if confirmform is None:
            raise BrokenPageError('Unable to find confirm form')
        formname = confirmform.attrib['name']
        self.browser.logger.debug('We are looking for : ' + realpasswd)
        self.browser.select_form(formname)
        self.browser.set_all_readonly(False)
        for a in self.browser.controls[:]:
            if "_link_hidden_" in str(a) or "j_idcl" in str(a):
                self.browser.controls.remove(a)
        coordinates = vk.get_string_code(realpasswd)
        self.browser.logger.debug("Coordonates: " + coordinates)

        self.browser.controls.append(
            ClientForm.TextControl('text', 'AJAXREQUEST',
                                   {'value': '_viewRoot'}))
        self.browser.controls.append(
            ClientForm.TextControl('text', '%s:mrgtransfer' % formname,
                                   {'value': '%s:mrgtransfer' % formname}))
        self.browser['%s:mrltransfer' % formname] = coordinates
        self.browser.submit(nologin=True)