Example #1
0
 def obj_type(self):
     url = BrowserURL('housing', _id=Env('_id'))(self)
     if 'colocation' in url:
         return POSTS_TYPES.SHARING
     elif 'location' in url:
         isFurnished = False
         for li in XPath('//ul[@itemprop="description"]/li')(self):
             label = CleanText('./div[has-class("criteria-label")]')(li)
             if label.lower() == "meublé":
                 isFurnished = (
                     CleanText('./div[has-class("criteria-value")]')(li).lower() == 'oui'
                 )
         if isFurnished:
             return POSTS_TYPES.FURNISHED_RENT
         else:
             return POSTS_TYPES.RENT
     elif 'vente' in url:
         offertype = Attr(
             '//button[has-class("offer-contact-vertical-phone")][1]',
             'data-offertransactiontype'
         )(self)
         if offertype == '4':
             return POSTS_TYPES.VIAGER
         else:
             return POSTS_TYPES.SALE
     return NotAvailable
Example #2
0
 def obj_photos(self):
     photos = []
     for i in range(1, CleanDecimal('//NbPhotos')(self) + 1):
         img = CleanText('//LienImage%s' % i, replace=[(u'w=69&h=52', u'w=786&h=481')])(self)
         url = img if img.startswith('http') else u'http://www.entreparticuliers.com%s' % img
         photos.append(HousingPhoto(url))
     return photos
Example #3
0
            def parse(self, el):
                rooms_bedrooms_area = el.xpath(
                    './/div[@class="clearfix"]/ul[has-class("item-tags")]/li'
                )
                self.env['rooms'] = NotLoaded
                self.env['bedrooms'] = NotLoaded
                self.env['area'] = NotLoaded

                for item in rooms_bedrooms_area:
                    name = CleanText('.')(item)
                    if 'chambre' in name.lower():
                        name = 'bedrooms'
                        value = CleanDecimal('./strong')(item)
                    elif 'pièce' in name.lower():
                        name = 'rooms'
                        value = CleanDecimal('./strong')(item)
                    else:
                        name = 'area'
                        value = CleanDecimal(
                            Regexp(
                                CleanText(
                                    '.'
                                ),
                                r'(\d*\.*\d*) .*'
                            )
                        )(item)
                    self.env[name] = value
Example #4
0
    def find_account(self, acclabel, accowner):
        accowner = sorted(accowner.lower().split()) # first name and last name may not be ordered the same way on market site...

        # Check if history is present
        if CleanText(default=None).filter(self.doc.xpath('//body/p[contains(text(), "indisponible pour le moment")]')):
            return False

        ids = None
        for a in self.doc.xpath('//a[contains(@onclick, "indiceCompte")]'):
            self.logger.debug("get investment from onclick")

            label = CleanText('.')(a)
            owner = CleanText('./ancestor::tr/preceding-sibling::tr[@class="LnMnTiers"][1]')(a)
            owner = sorted(owner.lower().split())

            if label == acclabel and owner == accowner:
                ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'onclick')(a)).groups())
                ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
                self.logger.debug("assign value to ids: {}".format(ids))
                return ids

        for a in self.doc.xpath('//a[contains(@href, "indiceCompte")]'):
            self.logger.debug("get investment from href")
            if CleanText('.')(a) == acclabel:
                ids = list(re.search(r'indiceCompte[^\d]+(\d+).*idRacine[^\d]+(\d+)', Attr('.', 'href')(a)).groups())
                ids.append(CleanText('./ancestor::td/preceding-sibling::td')(a))
                self.logger.debug("assign value to ids: {}".format(ids))
                return ids
Example #5
0
            def parse(self, el):
                self.env['category'] = u'Interne' if Attr('.', 'value')(self)[0] == 'I' else u'Externe'
                if self.env['category'] == u'Interne':
                    # TODO use after 'I'?
                    _id = Regexp(CleanText('.'), r'- (\w+\d\w+)')(self) # at least one digit
                    accounts = list(self.page.browser.get_accounts_list()) + list(self.page.browser.get_loans_list())
                    match = [acc for acc in accounts if _id in acc.id]
                    assert len(match) == 1
                    match = match[0]
                    self.env['id'] = match.id
                    self.env['iban'] = match.iban
                    self.env['bank_name'] = u"Caisse d'Épargne"
                    self.env['label'] = match.label
                # Usual case
                elif Attr('.', 'value')(self)[1] == '-':
                    full = CleanText('.')(self)
                    if full.startswith('- '):
                        self.logger.warning('skipping recipient without a label: %r', full)
                        raise SkipItem()

                    # <recipient name> - <account number or iban> - <bank name (optional)> <optional last dash>
                    mtc = re.match('(?P<label>.+) - (?P<id>[^-]+) -(?P<bank> [^-]*)?-?$', full)
                    assert mtc
                    self.env['id'] = self.env['iban'] = mtc.group('id')
                    self.env['bank_name'] = (mtc.group('bank') and mtc.group('bank').strip()) or NotAvailable
                    self.env['label'] = mtc.group('label')
                # Fcking corner case
                else:
                    mtc = re.match('(?P<id>.+) - (?P<label>[^-]+) -( [^-]*)?-?$', CleanText('.')(self))
                    assert mtc
                    self.env['id'] = mtc.group('id')
                    self.env['iban'] = NotAvailable
                    self.env['bank_name'] = NotAvailable
                    self.env['label'] = mtc.group('label')
Example #6
0
        def parse(self, el):
            rooms_bedrooms_area = el.xpath(
                './/ul[has-class("item-tags")]/li'
            )
            self.env['rooms'] = NotAvailable
            self.env['bedrooms'] = NotAvailable
            self.env['area'] = NotAvailable

            for item in rooms_bedrooms_area:
                name = CleanText('.')(item)
                if 'chambre' in name.lower():
                    name = 'bedrooms'
                    value = CleanDecimal('./strong')(item)
                elif 'pièce' in name.lower():
                    name = 'rooms'
                    value = CleanDecimal('./strong')(item)
                elif ' m²' in name and 'le m²' not in name:
                    name = 'area'
                    value = CleanDecimal(
                        Regexp(
                            CleanText(
                                '.'
                            ),
                            r'(\d*\.*\d*) .*'
                        )
                    )(item)
                self.env[name] = value
Example #7
0
 def obj_id(self):
     href = CleanText('./td[2]/a/@href', default=NotAvailable)(self)
     spl = href.replace('.html', '').split('/')
     lid = spl[2]
     aid = spl[3]
     sid = spl[4]
     return '%s|%s|%s' % (lid, aid, sid)
Example #8
0
 def obj_size(self):
     rawsize = CleanText('//div[has-class("files")]/../h5')(self)
     s = rawsize.split(',')[-1].replace(')', '')
     nsize = float(re.sub(r'[A-Za-z]', '', s))
     usize = re.sub(r'[.0-9 ]', '', s).upper()
     size = get_bytes_size(nsize, usize)
     return size
Example #9
0
 def obj_size(self):
     rawsize = CleanText('(//div[@id="infosficher"]/span)[1]')(self)
     rawsize = rawsize.replace(',','.').strip()
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper().replace('O','B')
     size = get_bytes_size(nsize,usize)
     return size
Example #10
0
 def obj_size(self):
     rawsize = CleanText('./td[2]')(self)
     rawsize = rawsize.replace(',','.')
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper()
     size = get_bytes_size(nsize,usize)
     return size
Example #11
0
 def obj_size(self):
     rawsize = CleanText('./div[has-class("poid")]')(self)
     rawsize = rawsize.replace(',','.').strip()
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper().replace('O','B')
     size = get_bytes_size(nsize,usize)
     return size
Example #12
0
        def obj_details(self):
            details = {}

            details["creationDate"] = Date(
                Regexp(
                    CleanText(
                        '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]'
                    ),
                    u'.*Mis en ligne : (\d{2}/\d{2}/\d{4}).*'
                ),
                dayfirst=True
            )(self)

            honoraires = CleanText(
                (
                    '//div[has-class("offer-price")]/span[has-class("lbl-agencyfees")]'
                ),
                default=None
            )(self)
            if honoraires:
                details["Honoraires"] = (
                    "{} (TTC, en sus)".format(
                        honoraires.split(":")[1].strip()
                    )
                )

            for li in XPath('//ul[@itemprop="description"]/li')(self):
                label = CleanText('./div[has-class("criteria-label")]')(li)
                value = CleanText('./div[has-class("criteria-value")]')(li)
                details[label] = value

            return details
Example #13
0
 def obj_GES(self):
     greenhouse_value = CleanText(
         '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("greenhouse-summary")]',
         default=""
     )(self)
     if len(greenhouse_value):
         greenhouse_value = greenhouse_value.replace("GES", "").strip()[0]
     return getattr(ENERGY_CLASS, greenhouse_value, NotAvailable)
Example #14
0
 def obj_size(self):
     rawsize = CleanText('//span[has-class("folder") or has-class("folderopen")]')(self)
     rawsize = rawsize.split(': ')[-1].split(')')[0].strip()
     rawsize = rawsize.replace(',','.')
     nsize = float(rawsize.split()[0])
     usize = rawsize.split()[-1].upper()
     size = get_bytes_size(nsize,usize)
     return size
Example #15
0
 def obj_nb_person(self):
     nb_person = CleanText(
         '//span[@class="bu_cuisine_title_3 bu_cuisine_title_3--subtitle"]'
     )(self)
     nb_person = nb_person.lstrip('/').replace("pour", "").strip()
     return [
         nb_person
     ]
Example #16
0
 def obj_DPE(self):
     energy_value = CleanText(
         '//div[has-class("offer-energy-greenhouseeffect-summary")]//div[has-class("energy-summary")]',
         default=""
     )(self)
     if len(energy_value):
         energy_value = energy_value.replace("DPE", "").strip()[0]
     return getattr(ENERGY_CLASS, energy_value, NotAvailable)
Example #17
0
            def obj_label(self):
                raw_label = CleanText(TableCell('label'))(self)
                label = CleanText(TableCell('label')(self)[0].xpath('./br/following-sibling::text()'))(self)

                if (label and label.split()[0] != raw_label.split()[0]) or not label:
                    label = raw_label

                return CleanText(TableCell('label')(self)[0].xpath('./noscript'))(self) or label
Example #18
0
 def obj_utilities(self):
     price = CleanText(
         '//p[has-class("OfferTop-price")]'
     )(self)
     if "charges comprises" in price.lower():
         return UTILITIES.INCLUDED
     else:
         return UTILITIES.EXCLUDED
Example #19
0
 def obj_utilities(self):
     price = CleanText(
         './/strong[has-class("TeaserOffer-price-num")]'
     )(self)
     if "charges comprises" in price.lower():
         return UTILITIES.INCLUDED
     else:
         return UTILITIES.EXCLUDED
Example #20
0
    def on_load(self):
        h1 = CleanText('//h1[1]')(self.doc)

        if h1.startswith('Le service est moment'):
            text = CleanText('//h4[1]')(self.doc) or h1
            raise BrowserUnavailable(text)

        if not self.browser.no_login:
            raise LoggedOut()
Example #21
0
 def obj_details(self):
     charges = CleanText('.//span[@class="price-fees"]',
                         default=None)(self)
     if charges:
         return {
             "fees": charges.split(":")[1].strip()
         }
     else:
         return NotLoaded
Example #22
0
 def next_page(self):
     js_datas = CleanText('//div[@id="js-data"]/@data-rest-search-request')(self)
     total_page = self.page.browser.get_total_page(js_datas.split("?")[-1])
     m = re.match(".*page=(\d?)(?:&.*)?", self.page.url)
     if m:
         current_page = int(m.group(1))
         next_page = current_page + 1
         if next_page <= total_page:
             return self.page.url.replace("page=%d" % current_page, "page=%d" % next_page)
Example #23
0
        def obj_url(self):
            url = CleanText('//meta[@property="og:url"]/@content', default=None)(self)
            if url is None:
                url = JSVar(CleanText('//script'), var='_JobUrl')(self)

            if not url.startswith('http'):
                url = 'www.adecco.fr%s' % url

            return url
Example #24
0
 def get_author(self):
     try:
         author = CleanText('.')(self.get_element_author())
         if author.startswith('Par '):
             return author.split('Par ', 1)[1]
         else:
             return author
     except AttributeError:
         return ''
Example #25
0
 def load_product_codes(self):
     # store ISIN codes in a dictionary with a (label: isin) fashion
     product_codes = {}
     for table in self.doc.xpath('//table/tbody'):
         for row in table.xpath('//tr[contains(./th/@scope,"row")]'):
             label = CleanText('./th[1]', default=None)(row)
             isin_code = CleanText('./td[1]', default=None)(row)
             if label and isin_code:
                 product_codes[label.upper()] = isin_code
     return product_codes
Example #26
0
    def check_errors(self):
        # check if user can add new recipient
        errors_id = ('popinClientNonEligible', 'popinClientNonEligibleBis')

        for error_id in errors_id:
            if self.doc.xpath('//script[contains(text(), "showDivJQInfo(\'%s\')")]' % error_id):
                msg = CleanText('//div[@id="%s"]//p' % error_id)(self.doc)
                # get the first sentence of information message
                # beacause the message is too long and contains unnecessary recommendations
                raise AddRecipientBankError(message=msg.split('.')[0])
Example #27
0
 def obj_photos(self):
     url = CleanText('./div[has-class("default-img")]/img/@data-src')(self)
     if url:
         url = unquote(url)
         if "http://" in url[3:]:
             rindex = url.rfind("?")
             if rindex == -1:
                 rindex = None
             url = url[url.find("http://", 3):rindex]
         return [HousingPhoto(url)]
     else:
         return NotLoaded
Example #28
0
            def obj_type(self):
                try:
                    el_to_check = CleanText(TableCell('type'))(self)
                    type_dict = self.TYPE
                except ColumnNotFound:
                    el_to_check = Field('label')(self)
                    type_dict = self.TYPE_BY_LABELS

                for k, v in type_dict.items():
                    if el_to_check.startswith(k):
                        return v
                return Account.TYPE_UNKNOWN
Example #29
0
 def on_load(self):
     if self.doc.xpath(u'//form//h1[1][contains(text(), "Accusé de reception du chéquier")]'):
         form = self.get_form(name='Alert')
         form['command'] = 'validateAlertMessage'
         form['radioValide_1_2_40003039944'] = 'Non'
         form.submit()
     elif self.doc.xpath(u'//p[@class="cddErrorMessage"]'):
         error_message = CleanText(u'//p[@class="cddErrorMessage"]')(self.doc)
         # TODO python2 handles unicode exceptions badly, fix when passing to python3
         raise ActionNeeded(error_message.encode('ascii', 'replace'))
     else:
         raise ActionNeeded(CleanText(u'//form//h1[1]')(self.doc))
Example #30
0
 def obj_details(self):
     details = {}
     a = CleanText('//div[@class="box box-noborder"]/p[@class="size_13 darkergrey bold"]')(self)
     if a:
         splitted_a = a.split(':')
         dpe = Regexp(CleanText('//div[@id="energy-pyramid"]/img/@src'),
                      'http://mmf.logic-immo.com/mmf/fr/static/dpe/dpe_(\w)_b.gif',
                      default="")(self)
         if len(splitted_a) > 1:
             details[splitted_a[0]] = '%s (%s)' % (splitted_a[1], dpe)
         elif dpe:
             details[splitted_a[0]] = '%s'
     return details
Example #31
0
 def decode_paste(self, key):
     d = json.loads(CleanText('//div[@id="cipherdata"]')(self.doc))
     subd = json.loads(d[0]['data'])
     decr = decrypt(key, subd)
     return decompress(b64decode(decr), -MAX_WBITS)
Example #32
0
        class item(ItemElement):
            klass = Subscription

            obj_label = CleanText(Dict('nichandle'))
            obj_subscriber = Format("%s %s", CleanText(Dict('firstname')), CleanText(Dict('name')))
            obj_id = CleanText(Dict('nichandle'))
Example #33
0
 def get_expire(self):
     d = json.loads(CleanText('//div[@id="cipherdata"]')(self.doc))[0]['meta']
     if 'expire_date' in d:
         return datetime.fromtimestamp(d['expire_date'])
Example #34
0
 def on_load(self):
     error_message = CleanText(
         u'//td[contains(text(), "Votre adhésion au service WEB est résiliée depuis le")]'
     )(self.doc)
     if error_message:
         raise ActionNeeded(error_message)
Example #35
0
 def get_last_id(self):
     _el = self.page.doc.xpath('//*[@data-item-type="tweet"]/div')[-1]
     return CleanText('./@data-tweet-id')(_el)
Example #36
0
        class item(ItemElement):
            klass = Event

            obj_date = Date(CleanText('td[@headers="Date"]'))
            obj_activity = CleanText('td[@headers="Libelle"]')
            obj_location = CleanText('td[@headers="site"]')
Example #37
0
 def get_api_key(self):
     return JSVar(CleanText('//script'),
                  var='apiKey',
                  default=None)(self.doc)
Example #38
0
 def obj_id(self):
     product = Field('product')(self)
     _id = CleanText('./@id')(self)
     return u"%s.%s" % (product.id, _id)
Example #39
0
 def get_trends_token(self):
     json_data = CleanText('//input[@id="init-data"]/@value')(self.doc)
     return json.loads(json_data)['trendsCacheKey']
Example #40
0
 def get_message(self):
     return CleanText('//div[@id="div_text"]/h1 | //div[@id="div_text"]/p')(self.doc)
Example #41
0
 def obj_shop(self):
     _id = Field('id')(self)
     shop = Shop(_id)
     shop.name = CleanText('(./td)[4]')(self)
     shop.location = CleanText('(./td)[3]')(self)
     return shop
Example #42
0
 def obj_ingredients(self):
     ingredients = CleanText(
         '//p[@class="m_content_recette_ingredients"]',
         default='')(self).split('-')
     if len(ingredients) > 1:
         return ingredients[1:]
Example #43
0
 def obj_nb_person(self):
     nb_pers = Regexp(CleanText(
         '//p[@class="m_content_recette_ingredients"]/span[1]'),
                      '.*\(pour (\d+) personnes\)',
                      default=0)(self)
     return [nb_pers] if nb_pers else NotAvailable
Example #44
0
 def next_page(self):
     return CleanText(
         '//a[@id="ctl00_cphMainContent_m_ctrlSearchEngine_m_ctrlSearchListDisplay_m_ctrlSearchPagination_m_linkNextPage"]/@href',
         default=None)(self)
Example #45
0
 def obj_currency(self):
     txt = CleanText('./article/div/div[@itemprop="location"]')(
         self)
     return Currency.get_currency(txt)
Example #46
0
 def condition(self):
     return CleanText('./@id', default=False)(self)
Example #47
0
 def get_error(self):
     return CleanText("//div[@class='error']")(self.doc)
Example #48
0
 def obj_duration(self):
     _d = CleanText('./div/div/a/@data-duration')(self)
     return timedelta(seconds=int(_d))
Example #49
0
 def obj_date(self):
     time = CleanText(u'//span[@id="refresh_time"]')(self)
     time = [int(t) for t in time.split(":")]
     now = datetime.datetime.now()
     now.replace(hour=time[0], minute=time[1])
     return now
Example #50
0
 def obj_thumbnail(self):
     thumbnail = Thumbnail(CleanText('//image[1]/url')(self))
     thumbnail.url = thumbnail.id
     return thumbnail
Example #51
0
 def get_iban(self):
     return CleanText(
         '//td[contains(text(), "IBAN") and @class="ColonneCode"]',
         replace=[('IBAN', ''), (' ', '')])(self.doc)
Example #52
0
 def obj_duration(self):
     duration = self.el.xpath('itunes:duration',
                              namespaces={'itunes': 'http://www.itunes.com/dtds/podcast-1.0.dtd'})
     return Duration(CleanText('.'))(duration[0])
Example #53
0
 def get_france_culture_podcasts_url(self):
     for a in XPath('//a[@class="podcast"]')(self.doc):
         emission_id = Regexp(CleanText('./@href'),
                              'http://radiofrance-podcast.net/podcast09/rss_(.*).xml', default=None)(a)
         if emission_id:
             return emission_id
Example #54
0
 def get_product_name(self):
     return CleanText('(//table[@id="tab_resultat"]/tr/th)[6]',
                      default='')(self.doc)
Example #55
0
 def get_url(self):
     url = Regexp(CleanText('//script'), '.*liveUrl: \'(.*)\', timeshiftUrl.*', default=None)(self.doc)
     if not url:
         url = CleanText('//a[@id="player"][1]/@href')(self.doc)
     return url
Example #56
0
        class item(ItemElement):
            klass = Product

            obj_id = CleanText('./input/@value')
            obj_name = CleanText('./label')
Example #57
0
 def get_token(self):
     return CleanText(
         '//input[@id="recherche_recherchertype__token"]/@value')(self.doc)
Example #58
0
 def has_paste(self):
     return bool(CleanText('//div[@id="cipherdata"]')(self.doc))
Example #59
0
 def get_min_position(self):
     return CleanText(
         '//div[@class="stream-container "]/@data-min-position')(self.doc)
Example #60
0
    def check_double_auth(self):
        double_auth = self.doc.xpath('//input[@id="codeSMS"]')

        if double_auth:
            raise ActionNeeded(CleanText('(//div[contains(., "Two-Factor")])[5]')(self.doc))