Esempio n. 1
0
    def obj_url(self):
        keys_to_copy = {
            'idDocument': 'idDoc',
            'dateDocument': 'dateDoc',
            'idLocalisation': 'idLocalisation',
            'viDocDocument': 'viDocDocument',
        }
        # Here we parse the json with ibancrypte in it, for most cases
        if 'ibanCrypte' in self.el:
            url = 'demat-wspl/rest/consultationDocumentDemat?'
            keys_to_copy.update({
                'typeCpt': 'typeCompte',
                'familleDoc': 'famDoc',
                'ibanCrypte': 'ibanCrypte',
                'typeDoc': 'typeDoc',
                'consulted': 'consulted',
            })
            request_params = {'typeFamille': 'R001', 'ikpiPersonne': ''}
        # Here we parse the json with idcontrat in it. For the cases present
        # on privee.mabanque where sometimes the doc url is different
        else:
            url = 'demat-wspl/rest/consultationDocumentSpecialBpfDemat?'
            keys_to_copy.update({
                'heureDocument': 'heureDoc',
                'numClient': 'numClient',
                'typeReport': 'typeReport',
            })
            request_params = {'ibanCrypte': ''}

        for k, v in keys_to_copy.items():
            request_params[k] = Dict(v)(self)

        return Env('baseurl')(self) + url + urlencode(request_params)
Esempio n. 2
0
        class item(ItemElement):
            klass = Account

            obj_type = Account.TYPE_CARD
            obj_currency = 'EUR'
            obj_number = CleanText(TableCell('number'))
            obj_label = Format('%s %s', CleanText(TableCell('label')),
                               obj_number)
            obj_id = Format('%s.%s', Env('parent_id'), obj_number)

            def obj_coming(self):
                comings = (CleanDecimal(TableCell('balance', default=None),
                                        replace_dots=True,
                                        default=None)(self),
                           CleanDecimal(TableCell('_credit', default=None),
                                        replace_dots=True,
                                        default=None)(self),
                           CleanDecimal(TableCell('_debit', default=None),
                                        replace_dots=True,
                                        default=None)(self))

                for coming in comings:
                    if not empty(coming):
                        return coming
                else:
                    # There should have at least 0.00 in debit column
                    assert False

            def obj_url(self):
                td = TableCell('label')(self)[0].xpath('.//a')[0]
                return urljoin(self.page.url, td.attrib['href'])
Esempio n. 3
0
    class get_recipe(ItemElement):
        klass = Recipe

        obj_id = Env('id')
        obj_title = CleanText('//h1[@class="fn"]')

        def obj_ingredients(self):
            ingredients = []
            for el in self.page.doc.xpath(
                    '//section[has-class("recette_ingredients")]/ul/li'):
                ingredients.append(CleanText('.')(el))
            return ingredients

        obj_cooking_time = Time(CleanText('//span[@class="cooktime"]'))
        obj_preparation_time = Time(CleanText('//span[@class="preptime"]'))

        def obj_nb_person(self):
            return [
                Type(CleanText('//span[@class="yield"]'), type=int,
                     default=0)(self)
            ]

        obj_instructions = CleanHTML(
            '//article[@class="recette_etape"]/h3|//article[@class="recette_etape"]/div[@class="recette_etape_texte"]/*[not(self::article)]'
        )

        obj_picture_url = CleanText(
            '//section[has-class("recette_infos")]/div/img[@class="photo"]/@src'
        )
        obj_author = CleanText('//span[@class="author"]', default=NotAvailable)
Esempio n. 4
0
                class item(ItemElement):
                    klass = GaugeSensor

                    obj_name = Map(Dict('key'), SENSOR_NAMES)
                    obj_gaugeid = Env('nom_court_sit')
                    obj_id = Format('%s.%s', obj_gaugeid, Dict('key'))
                    obj_unit = 'µg/m³'

                    class obj_lastvalue(ItemElement):
                        klass = GaugeMeasure

                        obj_date = DateTime(
                            Format(
                                '%s %s',
                                Env('min_donnees'),
                                Env('date'),  # "date" contains the time...
                            )
                        )
                        obj_level = CleanDecimal(Dict('value'))

                    class obj_geo(ItemElement):
                        klass = GeoCoordinates

                        obj_latitude = CleanDecimal(Env('latitude'))
                        obj_longitude = CleanDecimal(Env('longitude'))

                    class obj_location(ItemElement):
                        klass = PostalAddress

                        obj_street = Env('adresse')
                        obj_postal_code = Env('ninsee')
                        obj_city = Env('city')
                        obj_region = 'Ile-de-France'
                        obj_country = 'France'
Esempio n. 5
0
        class item(ItemElement):
            klass = Subscription

            obj_label = CleanText('//span[@class="ecconumteleule"]')
            obj_subscriber = CleanText(
                '//span[@class="economligneaseule eccobold"]')
            obj_id = Env('id')
            obj__contract = Env('contract')

            def parse(self, el):
                self.env['id'] = re.sub(
                    r'[^\d\-\.]', '',
                    el.xpath('//span[@class="ecconumteleule"]')[0].text)
                self.env['contract'] = re.search(
                    "tc_vars\[\"ID_contrat\"\] = '([0-9]+)'",
                    self.page.data).group(1)
Esempio n. 6
0
        class item(ItemElement):
            klass = BaseAudio

            def condition(self):
                return Dict('path_mp3')(self)

            obj_id = BaseAudioIdFilter(Format(u'%s.%s', Env('radio_id'), Dict('nid')))
            obj_format = u'mp3'
            obj_ext = u'mp3'

            obj_title = Format(u'%s : %s',
                               Dict('title_emission'),
                               Dict('title_diff'))
            obj_description = Dict('desc_emission', default=u'')

            obj_author = Join(u', ', Dict('personnes', default=u''))
            obj_url = Dict('path_mp3')

            def obj_thumbnail(self):
                if 'path_img_emission' in self.el:
                    thumbnail = Thumbnail(Dict('path_img_emission')(self))
                    thumbnail.url = thumbnail.id
                    return thumbnail

            def obj_duration(self):
                fin = Dict('fin')(self)
                debut = Dict('debut')(self)
                if debut and fin:
                    return timedelta(seconds=int(fin) - int(debut))
Esempio n. 7
0
    class get_recipe(ItemElement):
        klass = Recipe

        def parse(self, el):
            item = XPath(u'//script[@type="application/ld+json"]')(self)

            json_content = CleanText(u'.',
                                     replace=[('//<![CDATA[ ', ''),
                                              (' //]]>', '')])(item[0])
            self.el = json.loads(json_content)

        obj_id = Env('id')
        obj_title = Dict('name')
        obj_ingredients = Dict('recipeIngredient')

        class obj_picture(ItemElement):
            klass = BaseImage

            obj_url = Dict('image')
            obj_thumbnail = Eval(Thumbnail, obj_url)

        def obj_instructions(self):
            instructions = ''
            for item in Dict('recipeInstructions')(self):
                instructions = u"{0} - {1}\n\n".format(instructions,
                                                       item['text'])
            return instructions

        obj_preparation_time = Eval(int, CleanDecimal(Dict('prepTime')))
        obj_cooking_time = Eval(int, CleanDecimal(Dict('cookTime')))

        def obj_nb_person(self):
            return [Dict('recipeYield')(self)]
Esempio n. 8
0
        class item(ItemElement):
            klass = Bill

            obj_id = Format('facture-%s-%s-%s#%s',
                            Slugify(CleanText(TableCell('date'))),
                            Slugify(CleanText(TableCell('amount'))),
                            Slugify(CleanText(TableCell('type'))),
                            Env('sub_id'))
            obj_url = AbsoluteLink('./td[5]//a', default=NotAvailable)
            obj_date = Date(CleanText(TableCell('date')), dayfirst=True)
            obj_label = Format('%s %s %s', CleanText(TableCell('type')),
                               CleanText(TableCell('amount')),
                               CleanText(TableCell('date')))
            obj_type = DocumentTypes.BILL
            obj_price = CleanDecimal(TableCell('amount'), replace_dots=True)
            obj_currency = Currency(TableCell('amount'))
            obj_duedate = Date(Regexp(CleanText(TableCell('status')),
                                      r'le (\d+)/(\d+)/(\d+)', r'\1/\2/\3'),
                               dayfirst=True)

            def obj_format(self):
                if self.obj_url(self):
                    return 'pdf'
                return NotAvailable

            def obj_income(self):
                if self.obj_price(self) < 0:
                    return True
                return False
Esempio n. 9
0
    class get_job_advert(ItemElement):
        klass = BaseJobAdvert

        obj_url = Format('%s#%s', Env('url'), Env('id'))
        obj_description = Join('%s\r\n',
                               'div/fieldset/*[(@class="titreParagraphe" or @class="normal")]',
                               textCleaner=CleanHTML)
        obj_title = CleanText('div/span[@class="intituleposte"]')
        obj_job_name = CleanText('div/span[@class="intituleposte"]')
        obj_society_name = Format('CCI %s', CleanText('div/span[@class="crci crcititle"]'))
        obj_publication_date = DateTime(CleanText('div/fieldset/p[@class="dateOffre"]'), dayfirst=True)

        def parse(self, el):
            self.el = el.xpath("//a[@name='%s']/following-sibling::div[1]" % self.obj.id)[0]
            self.env['url'] = self.page.url
            self.env['id'] = self.obj.id
Esempio n. 10
0
 def parse(self, el):
     for i, sub_group in enumerate(self.el):
         for j, sub in enumerate(Dict('listOfBillsByAccDTO')(sub_group)):
             if Dict('accDTO/numAcc')(sub) in Env('subid')(self):
                 self.item_xpath = "%d/listOfBillsByAccDTO/%d/listOfbills" % (i, j)
                 self.env['bpNumber'] = Dict('%d/bpDto/bpNumber' % i)(self)
                 break
Esempio n. 11
0
 def obj_shop(self):
     shop = Shop(Env('_id')(self))
     shop.name = Regexp(CleanText('(//div[@xtcz="contacter_le_vendeur"]/div/ul/li)[1]'),
                        'Nom : (.*)')(self)
     shop.location = JSVar(CleanText('//script'), var='tooltip')(self)
     shop.info = CleanText('//div[@xtcz="contacter_le_vendeur"]/div/ul/li[has-class("printPhone")]')(self)
     return shop
Esempio n. 12
0
 def next_page(self):
     page = Regexp(CleanText('//link[@rel="next"]/@href', default=''),
                   '.*pg=(\d*)',
                   default=None)(self)
     return BrowserURL('adv_search',
                       search=Env('search'),
                       page=int(page))(self)
Esempio n. 13
0
    class get_recipe(ItemElement):
        klass = Recipe

        obj_id = Env('id')
        obj_title = CleanText('//h1[has-class("m_title")]')
        obj_preparation_time = Type(CleanText('//span[@class="preptime"]'),
                                    type=int)
        obj_cooking_time = Type(CleanText('//span[@class="cooktime"]'),
                                type=int)

        def obj_nb_person(self):
            nb_pers = Regexp(CleanText(
                '//div[@class="m_content_recette_ingredients m_avec_substitution"]/span[1]'
            ),
                             '.*\(pour (\d+) personnes\)',
                             default=0)(self)
            return [nb_pers] if nb_pers else NotAvailable

        def obj_ingredients(self):
            ingredients = CleanText(
                '//div[@class="m_content_recette_ingredients m_avec_substitution"]',
                default='')(self).split('-')
            if len(ingredients) > 1:
                return ingredients[1:]
            return []

        obj_instructions = CleanHTML('//div[@class="m_content_recette_todo"]')
        obj_thumbnail_url = CleanText(
            '//a[@class="m_content_recette_illu"]/img/@src',
            default=NotAvailable)
        obj_picture_url = CleanText(
            '//a[@class="m_content_recette_illu"]/img/@src',
            default=NotAvailable)
Esempio n. 14
0
        class item(ItemElement):
            klass = Housing

            obj_id = Format(
                '%s-%s', Regexp(Env('type'), '(.*)-.*'),
                CleanText('./@id', replace=[('header-offer-', '')]))
            obj_title = CleanText(
                './div/div/div[@class="offer-details-wrapper"]/div/div/p[@class="offer-type"]/span/@title'
            )
            obj_area = CleanDecimal(
                './div/div/div[@class="offer-details-wrapper"]/div/div/div/div/h3/a/span[@class="offer-area-number"]',
                default=0)
            obj_cost = CleanDecimal(Regexp(CleanText(
                './div/div/div[@class="offer-details-wrapper"]/div/div/p[@class="offer-price"]/span',
                default=NotAvailable),
                                           '(.*) [%s%s%s]' %
                                           (u'€', u'$', u'£'),
                                           default=NotAvailable),
                                    default=0)
            obj_currency = Regexp(CleanText(
                './div/div/div[@class="offer-details-wrapper"]/div/div/p[@class="offer-price"]/span',
                default=NotAvailable),
                                  '.* ([%s%s%s])' % (u'€', u'$', u'£'),
                                  default=u'€')
            obj_date = Date(
                Regexp(
                    CleanText(
                        './div/div/div[has-class("offer-picture-more")]/div/p[@class="offer-update"]'
                    ), ".*(\d{2}/\d{2}/\d{4}).*"))
            obj_text = CleanText(
                './div/div/div[@class="offer-details-wrapper"]/div/div/div/p[has-class("offer-description")]/span'
            )
            obj_location = CleanText(
                './div/div/div[@class="offer-details-wrapper"]/div/div/div/div/h2'
            )
Esempio n. 15
0
    class get_story(ItemElement):
        klass = Story

        obj_id = Env('id')
        obj_title = CleanText('//h1')
        obj_date = CleanText('//span[has-class("t4")]') & Regexp(
            pattern=r'le (\d+)-(\d+)-(\d+)', template=r'\3-\2-\1') & Date
        obj_category = CleanText('//a[starts-with(@href, "histoires-cat")]')

        def obj_body(self):
            div = self.el.xpath('//div[@align="justify"]')[0]
            body = ''
            for para in div.findall('br'):
                if para.text is not None:
                    body += para.text.strip()
                body += '\n'
                if para.tail is not None:
                    body += para.tail.strip()
            return body.replace(u'\x92', "'").strip()

        class obj_author(ItemElement):
            klass = Author

            obj_name = CleanText('//a[starts-with(@href, "fiche.php")][2]')
            obj_sex = CleanText('//td[has-class("t0")]') & Regexp(
                pattern=r"Auteur (\w+)") & Author.Sex2Enum
Esempio n. 16
0
 def obj_investment(self):
     investment = None
     for inv in self.page.browser.cache['invs'][Env('accid')(self)]:
         if inv.label in CleanText('./parent::tbody/preceding-sibling::tbody[1]')(self):
             investment = inv
     assert investment is not None
     return investment
Esempio n. 17
0
        class item(ItemElement):
            klass = AgendaculturelEvent

            def validate(self, obj):
                return self.check_date(obj) and self.check_category(obj)

            def check_date(self, obj):
                if self.env['date_from'] and obj.start_date >= self.env[
                        'date_from']:
                    if not self.env['date_to']:
                        return True
                    elif obj.end_date and obj.end_date <= self.env['date_to']:
                        return True
                    elif self.env['date_to'] >= obj.start_date:
                        return True
                return False

            def check_category(self, obj):
                return (not self.env['categories']
                        or obj.category in self.env['categories'])

            obj_id = Format(
                '%s.%s', Env('region'),
                Regexp(CleanText('./div/a[@itemprop="url"]/@href'),
                       '/(.*).html'))
            obj_summary = CleanText('./div/a[@itemprop="url"]')

            def obj_start_date(self):
                _date = Date(
                    CleanText('./meta[@itemprop="startDate"]/@content'))(self)
                return datetime.combine(_date, time.min)

            obj_category = AgendaculturelCategory(
                Regexp(CleanText('./@itemtype'), 'http://schema.org/(.*)'))
Esempio n. 18
0
        class item(ItemElement):
            klass = Bill

            obj_date = Date(Dict('dueDate'),
                            parse_func=parse_french_date,
                            default=NotAvailable)
            obj_price = CleanDecimal(Dict('amountIncludingTax'))
            obj_format = 'pdf'

            def obj_label(self):
                return 'Facture du %s' % Field('date')(self)

            def obj_id(self):
                return '%s_%s' % (Env('subid')(self),
                                  Field('date')(self).strftime('%d%m%Y'))

            def get_params(self):
                params = {
                    'billid': Dict('id')(self),
                    'billDate': Dict('dueDate')(self)
                }
                return urlencode(params)

            obj_url = BrowserURL('doc_api_pro',
                                 subid=Env('subid'),
                                 dir=Dict('documents/0/mainDir'),
                                 fact_type=Dict('documents/0/subDir'),
                                 billparams=get_params)
            obj__is_v2 = False
Esempio n. 19
0
        class item(ItemElement):
            klass = Bill

            obj_id = Format(
                '%s#%s',
                Env('subscription'),
                Attr('.', 'id')
            )

            obj_price = CleanDecimal('.//span[has-class("nbPrice")]',
                                     replace_dots=(',', '€'))

            obj_currency = "€"

            def obj_income(self):
                price = CleanText('.//span[has-class("nbPrice")]')(self)
                return not price.startswith('−')

            obj_label = CleanText('.//p[has-class("TeaserRow-desc")]')
            obj_date = Date(CleanText('.//p[has-class("TeaserRow-date")]'),
                            dayfirst=True)
            obj_duedate = obj_date
            obj_format = "pdf"

            def obj_url(self):
                try:
                    return urljoin(
                        self.page.browser.BASEURL,
                        Link('.//a[has-class("Download")]')(self)
                    )
                except XPathNotFound:
                    return NotAvailable
Esempio n. 20
0
        class account(ItemElement):
            klass = Account

            obj_balance = CleanDecimal(TableCell('balance'),
                                       replace_dots=True,
                                       sign=lambda x: -1)
            obj_currency = FrenchTransaction.Currency(TableCell('balance'))
            obj_type = Account.TYPE_LOAN
            obj_id = Env('id')

            def obj_label(self):
                has_type = CleanText(
                    './ancestor::table[.//th[contains(text(), "Type")]]',
                    default=None)(self)
                return CleanText('./td[2]')(self) if has_type else CleanText(
                    './ancestor::table/preceding-sibling::div[1]')(self).split(
                        ' - ')[0]

            def parse(self, el):
                label = Field('label')(self)
                trs = self.xpath(
                    '//td[contains(text(), "%s")]/ancestor::tr[1] | ./ancestor::table[1]/tbody/tr'
                    % label)
                i = [i for i in range(len(trs)) if el == trs[i]]
                i = i[0] if i else 0
                label = label.replace(' ', '')
                self.env['id'] = "%s%s%s" % (Regexp(
                    CleanText(TableCell('id')), r'(\w+)\s-\s(\w+)',
                    r'\1\2')(self), label.replace(' ', ''), i)
Esempio n. 21
0
 def obj_date(self):
     date = CleanText('./td[1]/font//text()')(self)
     if len(date) == 10:
         return Date(CleanText('./td[1]/font//text()'), dayfirst=True)(self)
     elif len(date) == 5:
         # Date has no indicated year.
         return DateGuesser(CleanText('./td[1]//text()'), Env('date_guesser'))(self)
Esempio n. 22
0
        class item(ItemElement):
            klass = Bill

            obj__simple_id = CleanText(
                './/div[has-class("actions")]//span[has-class("value")]')
            obj_id = Format('%s_%s', Env('subid'), Field('_simple_id'))
            obj_url = Format(
                '/gp/css/summary/print.html/ref=oh_aui_ajax_pi?ie=UTF8&orderID=%s',
                Field('_simple_id'))
            obj_format = 'html'
            obj_label = Format('Facture %s', Field('_simple_id'))
            obj_type = 'bill'

            def obj_date(self):
                currency = Env('currency')(self)
                return parse_french_date(
                    CleanText(
                        './/div[has-class("a-col-left")]//span[has-class("value") and not(contains(., "%s"))]'
                        % currency)(self))

            def obj_price(self):
                currency = Env('currency')(self)
                return CleanDecimal(
                    './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]'
                    % currency,
                    replace_dots=currency == u'EUR')(self)

            def obj_currency(self):
                currency = Env('currency')(self)
                return Currency(
                    './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]'
                    % currency)(self)
Esempio n. 23
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = CleanText(
            '//h1[@class="desc clearfix"]/span[@class="title"]')
        obj_cost = CleanDecimal(
            '//h1[@class="desc clearfix"]/span[@class="prix"]')
        obj_currency = Regexp(
            CleanText('//h1[@class="desc clearfix"]/span[@class="prix"]'),
            '.*([%s%s%s])' % (u'€', u'$', u'£'),
            default=u'€')
        obj_area = CleanDecimal(Regexp(
            CleanText('//h1[@class="desc clearfix"]/span[@class="title"]'),
            '(.*?)(\d*) m\xb2(.*?)', '\\2'),
                                default=NotAvailable)
        obj_location = CleanText('//div[@class="text-annonce"]/h2')
        obj_text = CleanHTML('//div[@class="text-annonce"]/p')
        obj_station = CleanText('//div[@class="metro"]')
        obj_phone = CleanText('//span[@class="telephone hide-tel"]')
        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_details(self):
            details = dict()
            for item in XPath('//div[@class="footer-descriptif"]/ul/li')(self):
                key = CleanText('./span[@class="label"]')(item)
                value = CleanText('.', replace=[(key, '')])(item)
                if value and key:
                    details[key] = value

            key = CleanText(
                '//div[@class="classe-energie-content"]/div/div/span')(self)
            value = Format(
                '%s(%s)',
                CleanText('//div[@class="classe-energie-content"]/div/div/p'),
                CleanText('//div[@class="classe-energie-content"]/div/@class',
                          replace=[('-', ' ')]))(self)
            if value and key:
                details[key] = value
            return details

        def obj_photos(self):
            photos = []
            for img in XPath('//div[@class="showcase-thumbnail"]/img/@src')(
                    self):
                photos.append(HousingPhoto(u'%s' % img))
            return photos
Esempio n. 24
0
    class get_housing(ItemElement):
        klass = Housing

        obj_id = Env('_id')
        obj_title = CleanText('//h1[@class="clearfix"]/span[@class="title"]')
        obj_cost = CleanDecimal('//h1[@class="clearfix"]/span[@class="price"]',
                                replace_dots=True)
        obj_currency = Regexp(
            CleanText('//h1[@class="clearfix"]/span[@class="price"]'),
            '.*([%s%s%s])' % (u'€', u'$', u'£'),
            default=u'€')
        obj_area = CleanDecimal(Regexp(
            CleanText('//h1[@class="clearfix"]/span[@class="title"]'),
            '(.*?)(\d*) m\xb2(.*?)', '\\2'),
                                default=NotAvailable)
        obj_price_per_meter = PricePerMeterFilter()
        obj_location = CleanText('//div[@class="item-geoloc"]/h2')
        obj_text = CleanText(CleanHTML('//p[@class="item-description"]'))
        obj_station = CleanText('//div[@class="metro"]')
        obj_phone = CleanHTML('(//div[has-class("tel-wrapper")])[1]')
        obj_url = BrowserURL('housing', _id=Env('_id'))

        def obj_details(self):
            details = dict()
            for item in XPath('//ul[@class="item-summary"]/li')(self):
                key = CleanText('.', children=False)(item)
                value = CleanText('./strong')(item)
                if value and key:
                    details[key] = value

            key = CleanText(
                '//div[@class="box energy-box"]/div/div/p[@class="h3"]')(self)
            value = Format(
                '%s(%s)',
                CleanText('(//div[@class="box energy-box"]/div/div/p)[2]'),
                CleanText('//div[@class="box energy-box"]/div/div/@class',
                          replace=[('-', ''), ('rank', '')]))(self)
            if value and key:
                details[key] = value
            return details

        def obj_photos(self):
            photos = []
            for img in XPath(
                    '//div[has-class("showcase-thumbnail")]/img/@src')(self):
                photos.append(HousingPhoto(u'%s' % img))
            return photos
Esempio n. 25
0
    class get_video(ItemElement):
        klass = BaseVideo

        obj_id = Env('id')
        obj_title = CleanText('//title')
        obj_nsfw = True
        obj_ext = u'mp4'
        obj_url = CleanText('//script') & Regexp(pattern=r'(https:\\/\\/[^"]+\.mp4[^"]+)"') & CleanText(replace=[('\\', '')])
Esempio n. 26
0
    class get_job_advert(ItemElement):
        klass = BaseJobAdvert

        obj_id = Format('#%s', Env('_id'))
        obj_url = BrowserURL('advert', _id=Env('_id'))
        obj_title = CleanText(
            '//div[@id="jobcopy"]/h1[@itemprop="title"]|//div[@itemprop="title"]/h1'
        )
        obj_description = CleanHTML(
            '//div[@id="jobBodyContent"]|//div[@itemprop="description"]')
        obj_contract_type = Join(u' ',
                                 '//dd[starts-with(@class, "multipledd")]')
        obj_society_name = CleanText('//dd[@itemprop="hiringOrganization"]')
        obj_place = CleanText('//span[@itemprop="jobLocation"]')
        obj_pay = CleanText('//span[@itemprop="baseSalary"]')
        obj_formation = CleanText('//span[@itemprop="educationRequirements"]')
        obj_experience = CleanText('//span[@itemprop="qualifications"]')
Esempio n. 27
0
    class get_thread(ItemElement):
        klass = Thread

        obj_id = Format('%s#%s', Env('user'), Env('_id'))
        obj_title = Format(
            '%s \n\t %s',
            CleanText(
                '//div[@class="permalink-inner permalink-tweet-container"]/div/div/div/a',
                replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]),
            CleanText(
                '//div[@class="permalink-inner permalink-tweet-container"]/div/div/p',
                replace=[('@ ', '@'), ('# ', '#'), ('http:// ', 'http://')]))
        obj_date = DateTime(Regexp(
            CleanText(
                '//div[@class="permalink-inner permalink-tweet-container"]/div/div/div[@class="client-and-actions"]/span/span'
            ), '(\d+:\d+).+- (.+\d{4})', '\\2 \\1'),
                            translations=DATE_TRANSLATE_FR)
Esempio n. 28
0
        class item(ItemElement):
            klass = Collection

            obj_title = Dict(CleanText(Env('title')))
            obj_id = Dict('clusterId')

            def obj_split_path(self):
                return [SITE.PROGRAM.get('id'), Dict('clusterId')(self)]
Esempio n. 29
0
        class item(ItemElement):
            klass = Document

            obj_id = Format("%s_%s", Env("subscription"), Dict("reference"))
            obj_format = "pdf"
            obj_date = Date(Dict("datePrelevement"))
            obj__period = Regexp(Dict("datePrelevement"),
                                 r"(\d{4})-(\d{2})-(\d{2})", "\\1\\2")
            obj_label = Format("Prélèvement du %s", Field("date"))
            obj_type = DocumentTypes.OTHER
            obj_url = BrowserURL(
                "direct_debit_download",
                employer=Env("employer"),
                reference=Dict("reference"),
                period=Field("_period"),
                type=Dict("typeOrigine"),
            )
Esempio n. 30
0
 def obj_price(self):
     # Some orders, audiobooks for example, are paid using "audio credits", they have no price or currency
     currency = Env('currency')(self)
     return CleanDecimal(
         './/div[has-class("a-col-left")]//span[has-class("value") and contains(., "%s")]'
         % currency,
         replace_dots=currency == 'EUR',
         default=NotAvailable)(self)