コード例 #1
0
        class item(ItemElement):
            klass = Detail

            obj_id = None
            obj_datetime = DateTime(CleanText('td[1] | td[2]'))
            obj_price = CleanDecimal('td[7]', replace_dots=False, default=0)
            obj_currency = u'EUR'
            obj_label = Format(u"%s from %s to %s - %s", CleanText('td[3]'),
                               CleanText('td[4]'), CleanText('td[5]'),
                               CleanText('td[6]'))
コード例 #2
0
        def obj_date(self):
            _date = Regexp(
                CleanText('//div[@class="upload_by"]', replace=[(u'à', '')]),
                '.*- Mise en ligne le (.*).')(self)

            for fr, en in DATE_TRANSLATE_FR:
                _date = fr.sub(en, _date)

            self.env['tmp'] = _date
            return DateTime(Env('tmp'), LinearDateGuesser())(self)
コード例 #3
0
ファイル: pages.py プロジェクト: guix77/weboob
 def obj_end_date(self):
     m = re.findall(r'\w* \w* \d?\d \w* \d{4} \w* \d{2}h\d{2}',
                    CleanText('./@title')(self), re.UNICODE)
     if m:
         if len(m) == 1:
             return DateTime(Regexp(
                 CleanText('./@title'),
                 r'\w* \w* (\d?\d \w* \d{4}) \w* \d{2}h\d{2} \w* (\d{2}h\d{2})',
                 '\\1 \\2',
                 flags=re.UNICODE),
                             parse_func=parse_french_date)(self)
         else:
             return DateTime(Regexp(
                 CleanText('./@title'),
                 r'\w* \w* (\d?\d \w* \d{4}) \w* (\d{2}h\d{2})',
                 '\\1 \\2',
                 nth=-1,
                 flags=re.UNICODE),
                             parse_func=parse_french_date)(self)
コード例 #4
0
ファイル: pages.py プロジェクト: guix77/weboob
 def obj_start_date(self):
     m = re.findall(r'\w* \w* \d?\d \w* \d{4} \w* \d{2}h\d{2}',
                    CleanText('./@title')(self), re.UNICODE)
     if m:
         return DateTime(Regexp(
             CleanText('./@title'),
             '\w* \w* (\d?\d \w* \d{4}) \w* (\d{2}h\d{2}).*',
             '\\1 \\2',
             flags=re.UNICODE),
                         parse_func=parse_french_date)(self)
コード例 #5
0
        def obj_date(self):
            _date = Regexp(
                CleanText('//p[has-class("line")]', replace=[(u'à', '')]),
                '.*Mise en ligne le (.*)')(self)

            for fr, en in DATE_TRANSLATE_FR:
                _date = fr.sub(en, _date)

            self.env['tmp'] = _date
            return DateTime(Env('tmp'), LinearDateGuesser())(self)
コード例 #6
0
            def obj_date(self):
                _date = CleanText('./div[@class="lbc"]/div[@class="date"]',
                                  replace=[('Aujourd\'hui', str(date.today())),
                                           ('Hier',
                                            str((date.today() - timedelta(1))))
                                           ])(self)
                for fr, en in DATE_TRANSLATE_FR:
                    _date = fr.sub(en, _date)

                self.env['tmp'] = _date
                return DateTime(Env('tmp'), LinearDateGuesser())(self)
コード例 #7
0
ファイル: pages.py プロジェクト: sourcery-ai-bot/weboob
        class item(ItemElement):
            klass = BaseCalendarEvent

            obj_url = Link('./div[@class="bbox"]/h1/a')
            obj_id = Regexp(Link('./div[@class="bbox"]/h1/a'), r'aspx\?(.+)')
            obj_location = CleanText('./div[@class="bbox"]/span/a')
            obj_start_date = DateTime(Attr('.//time', 'datetime'))
            obj_summary = Regexp(Attr('./div[@class="bbox"]/h1/a', 'title'),
                                 r'details of (.+)')
            obj_category = CATEGORIES.CONCERT
            obj_status = STATUS.CONFIRMED
コード例 #8
0
            def obj_sensors(self):
                sensors = []

                lastdate = DateTime(Regexp(Env('datetime'), r'(\d+)\.(\d+)\.(\d+) (\d+):(\d+)', r'\3-\2-\1 \4:\5', default=NotAvailable), default=NotAvailable)(self)
                forecast = Map(Env('forecast'), self.forecasts, default=NotAvailable)(self)
                alarm = Map(Env('alarm'), self.alarmlevel, default=u'')(self)

                self.add_sensor(sensors, u"Level", u"cm", self.env['levelvalue'], forecast, alarm, lastdate)
                self.add_sensor(sensors, u"Flow", u"m3/s", self.env['flowvalue'], forecast, alarm, lastdate)

                return sensors
コード例 #9
0
ファイル: pages.py プロジェクト: dkremer-ledger/weboob
                    class obj_lastvalue(ItemElement):
                        klass = GaugeMeasure

                        obj_date = DateTime(
                            Format(
                                '%s %s',
                                Env('min_donnees'),
                                Env('date'),  # "date" contains the time...
                            )
                        )
                        obj_level = CleanDecimal(Dict('value'))
コード例 #10
0
    class get_video(ItemElement):
        klass = BaseVideo

        obj_id = Env('_id')
        obj_title = CleanText('//title')
        obj_author = CleanText('//meta[@name="author"]/@content')
        obj_description = CleanText('//meta[@name="description"]/@content')

        def obj_duration(self):
            seconds = int(
                CleanText('//meta[@property="video:duration"]/@content',
                          default=0)(self))
            return timedelta(seconds=seconds)

        def obj_thumbnail(self):
            url = CleanText('//meta[@property="og:image"]/@content')(self)
            thumbnail = Thumbnail(url)
            thumbnail.url = url
            return thumbnail

        obj_date = DateTime(
            CleanText('//meta[@property="video:release_date"]/@content'))

        def obj__formats(self):
            player = Regexp(
                CleanText('//script'),
                '.*var config = ({"context".*}}});\s*buildPlayer\(config\);.*',
                default=None)(self)
            if player:
                info = json.loads(player)
                if info.get('error') is not None:
                    raise ParseError(info['error']['title'])
                metadata = info.get('metadata')

                formats = {}
                for quality, media_list in metadata['qualities'].items():
                    for media in media_list:
                        media_url = media.get('url')
                        if not media_url:
                            continue
                        type_ = media.get('type')
                        if type_ == 'application/vnd.lumberjack.manifest':
                            continue
                        ext = determine_ext(media_url)
                        if ext in formats:
                            if quality in formats.get(ext):
                                formats[ext][quality] = media_url
                            else:
                                formats[ext] = {quality: media_url}
                        else:
                            formats[ext] = {quality: media_url}

                return formats
            return None
コード例 #11
0
ファイル: browser.py プロジェクト: dkremer-ledger/weboob
    def _parse_transaction(self, payment):
        transaction = Transaction()
        transaction_id = Dict('transaction_number', default=None)(payment)
        # Check if transaction_id is None which indicates failed transaction
        if transaction_id is None:
            return
        transaction.id = transaction_id
        transaction.date = DateTime(Dict('executed_at'))(payment)
        transaction.rdate = DateTime(Dict('created_at'))(payment)

        types = {
            'ORDER': Transaction.TYPE_CARD,  # order on lunchr website
            'LUNCHR_CARD_PAYMENT': Transaction.TYPE_CARD,  # pay in shop
            'MEAL_VOUCHER_CREDIT': Transaction.TYPE_DEPOSIT,
            # type can be null for refunds
        }
        transaction.type = types.get(Dict('type')(payment), Transaction.TYPE_UNKNOWN)
        transaction.label = Dict('name')(payment)
        transaction.amount = CleanDecimal(Dict('amount/value'))(payment)
        return transaction
コード例 #12
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_type = EPAdvertType(CleanText('//rubrique'))
        obj_advert_type = ADVERT_TYPES.PERSONAL
        obj_house_type = EPHouseType(CleanText('//tbien'))
        obj_title = CleanText('//titre')
        obj_rooms = CleanDecimal('//pieces')
        obj_cost = CleanDecimal('//prix')
        obj_currency = Currency.get_currency(u'€')
        obj_utilities = UTILITIES.UNKNOWN
        obj_text = CleanText('//titre')
        obj_location = CleanText('//ville')
        obj_url = CleanText('//urlDetailAnnonce')
        obj_area = CleanDecimal('//surface')
        obj_price_per_meter = PricePerMeterFilter()
        obj_phone = CleanText('//telephone1')
        obj_date = DateTime(CleanText('//DateCheck'))

        def obj_GES(self):
            value = CleanText('//GSE')(self)
            return getattr(ENERGY_CLASS, value.upper(), NotAvailable)

        def obj_photos(self):
            photos = []
            for photo in ['//UrlImage1', '//UrlImage2', '//UrlImage3']:
                p = CleanText(photo)(self)
                if p:
                    photos.append(HousingPhoto(p))
            return photos

        def obj_DPE(self):
            value = CleanText('//DPE')(self)
            return getattr(ENERGY_CLASS, value.upper(), NotAvailable)

        def obj_details(self):
            details = dict()
            d = [('//Nb_Etage', 'Nombre d\'etages'), ('//Neuf', 'Neuf'),
                 ('//Ancien_avec_du_Charme', 'Ancien avec charme'),
                 ('//Avec_terasse', 'Avec Terrasse'),
                 ('//latitude', 'Latitude'), ('//longitude', 'Longitude'),
                 ('//loyer', 'Loyer'), ('//piscine', 'Piscine'),
                 ('//surface_balcon', 'Surface du balcon'),
                 ('//surface_exp', 'Surface exploitable'),
                 ('//surface_terrain', 'Surface du Terrain'),
                 ('//Meuble', 'furnished')]

            for key, value in d:
                key = CleanText(key)(self)
                if key:
                    details[value] = key

            return details
コード例 #13
0
ファイル: pages.py プロジェクト: sourcery-ai-bot/weboob
 def get_roadmap(self):
     for step in self.doc.xpath(
             '//table[@class="trajet_etapes"]/tr[@class="etape"]'):
         roadstep = RoadStep()
         roadstep.line = '%s %s' % (DepartureTypeFilter(
             step.xpath('./td[@class="moyen"]'))(self),
                                    CleanText('./td[@class="moyen"]')(step))
         roadstep.start_time = DateTime(
             CleanText('./th/span[@class="depart"]'),
             LinearDateGuesser())(step)
         roadstep.end_time = DateTime(
             CleanText(
                 './th/span[@class="depart"]/following-sibling::span'),
             LinearDateGuesser())(step)
         roadstep.departure = CleanText('./td[@class="arret"]/p/strong')(
             step)
         roadstep.arrival = CleanText(
             './td[@class="arret"]/p/following-sibling::p/strong')(step)
         roadstep.duration = RoadMapDuration(
             CleanText('./td[@class="time"]'))(step)
         yield roadstep
コード例 #14
0
ファイル: pages.py プロジェクト: dermorz/weboob
    class get_last_video(ItemElement):
        klass = BaseVideo

        obj_id = CleanText('//div[@id="diffusion-info"]/@data-diffusion')
        obj_title = CleanText(
            '//div[@id="diffusion-info"]/h1/div[@id="diffusion-titre"]')
        obj_date = DateTime(Regexp(
            CleanText(
                '//div[@id="diffusion-info"]/h1|//div[@id="diffusion-info"]/div/div/*[1]',
                replace=[(u'à', u''), (u'  ', u' ')]),
            '.+(\d{2}-\d{2}-\d{2}.+\d{1,2}h\d{1,2}).+'),
                            dayfirst=True)
コード例 #15
0
        class item(ItemElement):
            klass = BaseVideo

            def condition(self):
                return CleanText('div[@class="autre-emission-c3"]')(self) == "En replay"

            obj_id = Regexp(Link('.'), '^/videos/.+,(.+).html$')
            obj_title = CleanText('//meta[@name="programme_titre"]/@content')
            obj_date = DateTime(Regexp(CleanText('./div[@class="autre-emission-c2"]|./div[@class="autre-emission-c4"]',
                                                 replace=[(u'à', u''), (u'  ', u' ')]),
                                       '(\d{2}-\d{2}.+\d{1,2}:\d{1,2})'),
                                dayfirst=True)
コード例 #16
0
        class item(ItemElement):
            klass = Detail

            def condition(self):
                txt = self.el.xpath('td[1]')[0].text
                return (txt is not None) and (txt != "Date")

            obj_id = None
            obj_datetime = DateTime(CleanText('td[1]', symbols=u'à'), dayfirst=True)
            obj_label = Format(u'%s %s %s', CleanText('td[2]'), CleanText('td[3]'),
                               CleanText('td[4]'))
            obj_price = CleanDecimal('td[5]', default=Decimal(0), replace_dots=True)
コード例 #17
0
ファイル: pages.py プロジェクト: linura/weboob
    class get_video(ItemElement):
        obj_title = CleanText('//article[@id="description"]//h1')
        obj_description = CleanText('//article[@id="description"]//section/following-sibling::div')

        obj_date = DateTime(Regexp(
            CleanText('//article[@id="description"]//span[contains(text(),"diffusé le")]'),
            r'(\d{2})\.(\d{2})\.(\d{2}) à (\d{2})h(\d{2})', r'20\3/\2/\1 \4:\5'))
        obj_duration = Eval(parse_duration, Regexp(CleanText('//div[span[text()="|"]]'), r'| (\d+)min'))

        obj_thumbnail = Eval(Thumbnail, Format('https:%s', Attr('//div[@id="playerPlaceholder"]//img', 'data-src')))
        obj__number = Attr('//div[@id="player"]', 'data-main-video')
        obj_license = LICENSES.COPYRIGHT
コード例 #18
0
    class get_thread(ItemElement):
        klass = Thread

        obj_id = Format('%s#%s', Env('user'), Env('_id'))
        obj_title = Format('%s \n\t %s',
                           CleanText('//div[@class="permalink-inner permalink-tweet-container"]/div/div/div/a',
                                     replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]),
                           CleanText('//div[@class="permalink-inner permalink-tweet-container"]/div/div/p',
                                     replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]))
        obj_date = DateTime(Regexp(CleanText('//div[@class="permalink-inner permalink-tweet-container"]/div/div/div[@class="client-and-actions"]/span/span'),
                                   '(\d+:\d+).+- (.+\d{4})',
                                   '\\2 \\1'), translations=DATE_TRANSLATE_FR)
コード例 #19
0
class SeLogerItem(ItemElement):
    klass = Housing

    obj_id = CleanText('idAnnonce')
    obj_title = CleanText('titre')
    obj_date = DateTime(CleanText('dtFraicheur'))
    obj_cost = CleanDecimal('prix')
    obj_currency = CleanText('prixUnite')
    obj_area = CleanDecimal('surface')
    obj_text = CleanText('descriptif')
    obj_location = CleanText('ville')
    obj_station = CleanText('proximite', default=NotAvailable)
    obj_url = CleanText('permaLien')
コード例 #20
0
ファイル: pages.py プロジェクト: dermorz/weboob
            def obj_date(self):
                _date = CleanText('./section[@class="item_infos"]/aside/p[@class="item_supp"]/text()',
                                  replace=[('Aujourd\'hui', str(date.today())),
                                           ('Hier', str((date.today() - timedelta(1))))])(self)

                if not _date:
                    return NotAvailable

                for fr, en in DATE_TRANSLATE_FR:
                    _date = fr.sub(en, _date)

                self.env['tmp'] = _date
                return DateTime(Env('tmp'), LinearDateGuesser())(self)
コード例 #21
0
def create_video(metadata):
    video = RmllVideo(metadata['oid'])

    video.title = unicode(metadata['title'])
    video.date = DateTime(Dict('creation'), default=NotLoaded)(metadata)
    video.duration = RmllDuration(Dict('duration', default=''),
                                  default=NotLoaded)(metadata)
    thumbnail = NormalizeThumbnail(Dict('thumb'))(metadata)
    video.thumbnail = Thumbnail(thumbnail)
    video.thumbnail.url = video.thumbnail.id
    video.url = NotLoaded

    return video
コード例 #22
0
ファイル: pages.py プロジェクト: guix77/weboob
        class item(ItemElement):
            klass = BaseJobAdvert

            obj_id = CleanText('./@data-jobid')
            obj_society_name = CleanText('./div/div/div[@class="company"]',
                                         default=NotAvailable)
            obj_title = CleanText('./div/div/header/h2[@class="title"]/a',
                                  default=NotAvailable)
            obj_publication_date = DateTime(
                CleanText('./div/div[has-class("meta")]/time/@datetime'),
                default=NotAvailable)
            obj_place = CleanText('./div/div/div[@class="location"]',
                                  default=NotAvailable)
コード例 #23
0
        class item(ItemElement):
            klass = RazibusCalendarEvent

            def validate(self, obj):
                return (self.is_valid_event(obj, self.env['city'],
                                            self.env['categories'])
                        and self.is_event_in_valid_period(
                            obj.start_date, self.env['date_from'],
                            self.env['date_to']))

            def is_valid_event(self, event, city, categories):
                if city and city != '' and city.upper() != event.city.upper():
                    return False
                if categories and len(
                        categories) > 0 and event.category not in categories:
                    return False
                return True

            def is_event_in_valid_period(self, event_date, date_from, date_to):
                if event_date >= date_from:
                    if not date_to:
                        return True
                    else:
                        if event_date <= date_to:
                            return True
                return False

            obj_id = Regexp(Link('./p/strong/a[@itemprop="url"]'),
                            'http://razibus.net/(.*).html')
            obj_summary = CleanText('./p/strong/a[@itemprop="url"]')
            obj_start_date = DateTime(
                CleanText('./p/span[@itemprop="startDate"]/@content'))
            obj_end_date = CombineDate(
                DateTime(
                    CleanText('./p/span[@itemprop="startDate"]/@content')),
                EndTime('.'))
            obj_location = CleanText('./p/span[@itemprop="location"]/@content')
            obj_city = CleanText('./p/span[@itemprop="location"]')
コード例 #24
0
        def _get_coef_value(self, AM=True, jour=0):
            if AM:
                time = DateTime(
                    CleanText('//tr[@id="MareeJours_%s"]/td[1]/b[1]' %
                              jour))(self)
                value = CleanText('//tr[@id="MareeJours_%s"]/td[3]/b[1]' %
                                  jour)(self)
            else:
                time, value = None, None
                if len(
                        XPath('//tr[@id="MareeJours_%s"]/td[1]/b' %
                              jour)(self)) > 1:
                    time = DateTime(
                        CleanText('//tr[@id="MareeJours_%s"]/td[1]/b[2]' %
                                  jour))(self)
                    value = CleanText('//tr[@id="MareeJours_%s"]/td[3]/b[2]' %
                                      jour)(self)

            if time and value:
                measure = GaugeMeasure()
                measure.level = float(value)
                measure.date = time + timedelta(days=jour)
                return measure
コード例 #25
0
ファイル: browser.py プロジェクト: dermorz/weboob
    class fill_paste(ItemElement):
        klass = PastealaconPaste

        obj_id = Env('id')
        obj_title = Regexp(CleanText('id("content")/h3'),
                           r'Posted by (.+) on .+ \(')
        obj__date = DateTime(
            Regexp(CleanText('id("content")/h3'), r'Posted by .+ on (.+) \('))
        obj_contents = RawText('//textarea[@id="code"]')

        def parse(self, el):
            # there is no 404, try to detect if there really is a content
            if len(el.xpath('id("content")/div[@class="syntax"]//ol')) != 1:
                raise PasteNotFound()
コード例 #26
0
class SeLogerItem(ItemElement):
    klass = Housing

    obj_id = CleanText('idAnnonce')

    def obj_type(self):
        idType = int(CleanText('idTypeTransaction')(self))
        type = next(k for k, v in TYPES.items() if v == idType)
        if type == POSTS_TYPES.FURNISHED_RENT:
            # SeLoger does not let us discriminate between furnished and not
            # furnished.
            return POSTS_TYPES.RENT
        return type

    def obj_house_type(self):
        idType = CleanText('idTypeBien')(self)
        try:
            return next(k for k, v in RET.items() if v == idType)
        except StopIteration:
            return NotAvailable

    obj_title = Format(
        "%s %s%s - %s",
        CleanText('titre'),
        CleanText('surface'),
        CleanText('surfaceUnite'),
        CleanText('ville'),
    )
    obj_date = DateTime(CleanText('dtFraicheur'))
    obj_cost = CleanDecimal('prix')

    obj_currency = Currency('prixUnite')

    obj_area = CleanDecimal('surface', default=NotAvailable)
    obj_price_per_meter = PricePerMeterFilter()
    obj_text = CleanText('descriptif')
    obj_rooms = CleanDecimal('nbPiece|nbPieces', default=NotAvailable)
    obj_bedrooms = CleanDecimal('nbChambre|nbChambres', default=NotAvailable)

    def obj_location(self):
        location = CleanText('adresse', default="")(self)
        quartier = CleanText('quartier', default=None)(self)
        if not location and quartier is not None:
            location = quartier
        ville = CleanText('ville')(self)
        cp = CleanText('cp')(self)
        return u'%s %s (%s)' % (location, ville, cp)

    obj_station = CleanText('proximite', default=NotAvailable)
    obj_url = CleanText('permaLien')
コード例 #27
0
        def _get_high_tide_value(self, AM=True, jour=0):
            if AM:
                time = DateTime(
                    CleanText('//tr[@id="MareeJours_%s"]/td[1]/b[1]' %
                              jour))(self)
                value = CleanDecimal('//tr[@id="MareeJours_0"]/td[2]/b[1]',
                                     replace_dots=True)(self)
            else:
                time, value = None, None
                if len(
                        XPath('//tr[@id="MareeJours_%s"]/td[1]/b' %
                              jour)(self)) > 1:
                    time = DateTime(CleanText(
                        '//tr[@id="MareeJours_%s"]/td[1]/b[2]' % jour),
                                    default=None)(self)
                    value = CleanDecimal('//tr[@id="MareeJours_0"]/td[2]/b[2]',
                                         replace_dots=True,
                                         default=None)(self)

            if time and value:
                measure = GaugeMeasure()
                measure.level = float(value)
                measure.date = time + timedelta(days=jour)
                return measure
コード例 #28
0
ファイル: pages.py プロジェクト: guix77/weboob
    class get_current(ItemElement):
        klass = Current

        obj_date = DateTime(Dict('vt1currentdatetime/dateTime'))
        obj_id = Env('city_id')
        obj_text = Format('%shPa (%s) - humidity %s%% - feels like %s°C - %s',
                          Dict('vt1observation/altimeter'),
                          Dict('vt1observation/barometerTrend'),
                          Dict('vt1observation/humidity'),
                          Dict('vt1observation/feelsLike'),
                          Dict('vt1observation/phrase'))

        def obj_temp(self):
            temp = Dict('vt1observation/temperature')(self)
            return Temperature(float(temp), 'C')
コード例 #29
0
ファイル: pages.py プロジェクト: guix77/weboob
        class item(ItemElement):
            klass = GaugeMeasure
            verif = re.compile("\d\d.\d\d.\d+ \d\d:\d\d")

            obj_date = DateTime(
                Regexp(CleanText('.'), r'(\d+)\.(\d+)\.(\d+) (\d+):(\d+)',
                       r'\3-\2-\1 \4:\5'))
            sensor_types = [u'Level', u'Flow']

            def obj_level(self):
                index = self.sensor_types.index(self.env['sensor'].name) + 1
                try:
                    return float(self.el[index].text_content())
                except ValueError:
                    return NotAvailable
コード例 #30
0
    class fill_paste(ItemElement):
        klass = PastebinPaste

        def parse(self, el):
            self.env['header'] = el.find('//div[@id="content_left"]//div[@class="paste_box_info"]')

        obj_id = Env('id')
        obj_title = Base(Env('header'), CleanText('.//div[@class="paste_box_line1"]//h1'))
        obj_contents = RawText('//textarea[@id="paste_code"]')
        obj_public = Base(
            Env('header'),
            CleanVisibility(Attr('.//div[@class="paste_box_line1"]//img', 'title')))
        obj__date = Base(
            Env('header'),
            DateTime(Attr('.//div[@class="paste_box_line2"]/span[1]', 'title')))