class get_job_advert(ItemElement): klass = BaseJobAdvert obj_id = Env('id') obj_url = BrowserURL('advert', id=Env('id')) obj_title = CleanText('//div[@class="modal-body"]/h2') obj_job_name = CleanText('//div[@class="modal-body"]/h2') obj_description = CleanText('//div[has-class("description")]/p') obj_society_name = CleanText('//div[@class="media-body"]/h4') obj_experience = Join( u'- ', '//h4[contains(text(), "Exp")]/following-sibling::ul[has-class("skill-list")][1]/li', newline=True, addBefore='\n- ') obj_formation = Join( u'- ', '//h4[contains(text(), "For")]/following-sibling::ul[has-class("skill-list")][1]/li', newline=True, addBefore='\n- ') obj_place = CleanText( '//div[@class="modal-body"]/h2/following-sibling::p[1]') obj_publication_date = PoleEmploiDate( CleanText('//div[@class="modal-body"]/h2/following-sibling::p[2]')) def parse(self, el): for el in XPath('//dl[@class="icon-group"]/dt')(el): dt = CleanText('.')(el) if dt == u'Type de contrat': self.obj.contract_type = CleanText( './following-sibling::dd[1]')(el) elif dt == u'Salaire': self.obj.pay = Regexp( CleanText('./following-sibling::dd[1]'), u'Salaire : (.*)')(el)
class get_job_advert(ItemElement): klass = BaseJobAdvert obj_id = Env('_id') obj_url = BrowserURL('advert_page', _id=Env('_id')) obj_title = CleanText('//div[@class="infos-lieu"]/h1') obj_place = CleanText('//div[@class="infos-lieu"]/h2') obj_publication_date = Date( Regexp(CleanText('//div[@class="info-agency"]'), '.*Date de l\'annonce :(.*)', default='')) obj_job_name = CleanText('//div[@class="infos-lieu"]/h1') obj_description = Format( '\n%s%s', CleanHTML('//article[@id="post-description"]/div'), CleanHTML('//article[@id="poste"]')) obj_contract_type = Regexp( CleanText('//article[@id="poste"]/div/ul/li'), 'Contrat : (\w*)', default=NotAvailable) obj_pay = Regexp(CleanText('//article[@id="poste"]/div/ul/li'), 'Salaire : (.*) par mois', default=NotAvailable) obj_experience = Regexp(CleanText('//article[@id="poste"]/div/ul/li'), u'Expérience : (.* ans)', default=NotAvailable)
class item(ItemElement): klass = Bill obj_date = Date(Dict('dueDate'), parse_func=parse_french_date, default=NotAvailable) obj_price = CleanDecimal(Dict('amountIncludingTax')) obj_format = 'pdf' def obj_label(self): return 'Facture du %s' % Field('date')(self) def obj_id(self): return '%s_%s' % (Env('subid')(self), Field('date')(self).strftime('%d%m%Y')) def get_params(self): params = { 'billid': Dict('id')(self), 'billDate': Dict('dueDate')(self) } return urlencode(params) obj_url = BrowserURL('doc_api_pro', subid=Env('subid'), dir=Dict('documents/0/mainDir'), fact_type=Dict('documents/0/subDir'), billparams=get_params) obj__is_v2 = False
def obj_coming(self): page = self.page.browser.open( BrowserURL('account_coming', identifiant=Field('iban'))(self)).page coming = Eval( lambda x, y: x / 10**y, CleanDecimal( Dict('infoOperationsAvenir/cumulTotal/montant', default='0')), CleanDecimal( Dict('infoOperationsAvenir/cumulTotal/nb_dec', default='0')))(page.doc) # this so that card coming transactions aren't accounted twice in the total incoming amount for el in Dict('infoOperationsAvenir/natures')(page.doc): if Dict('nature/libelle')( el) == "Factures / Retraits cartes": coming_carte = Eval( lambda x, y: x / 10**y, CleanDecimal( Dict('cumulNatureMere/montant', default='0')), CleanDecimal( Dict('cumulNatureMere/nb_dec', default='0')))(el) coming -= coming_carte break return coming
def next_page(self): page = Regexp(CleanText('//link[@rel="next"]/@href', default=''), '.*pg=(\d*)', default=None)(self) return BrowserURL('adv_search', search=Env('search'), page=int(page))(self)
class get_housing(ItemElement): klass = Housing obj_id = Env('_id') obj_title = CleanText(CleanHTML('//meta[@itemprop="name"]/@content')) obj_area = CleanDecimal(Regexp( CleanText(CleanHTML('//meta[@itemprop="name"]/@content')), '(.*?)(\d*) m\xb2(.*?)', '\\2'), default=NotAvailable) obj_cost = CleanDecimal('//*[@itemprop="price"]') obj_currency = Regexp(CleanText('//*[@itemprop="price"]'), '.*([%s%s%s])' % (u'€', u'$', u'£'), default=u'€') obj_date = Date( Regexp( CleanText( '//p[@class="offer-description-notes"]|//p[has-class("darkergrey")]' ), u'.* Mis à jour : (\d{2}/\d{2}/\d{4}).*')) obj_text = CleanHTML( '//div[@class="offer-description-text"]|//div[has-class("offer-description")]' ) obj_location = CleanText('//*[@itemprop="address"]') obj_url = BrowserURL('housing', _id=Env('_id')) def obj_photos(self): photos = [] for img in XPath( '//div[@class="carousel-content"]/ul/li/a/img/@src|//div[@class="carousel"]/ul/li/a/img/@src' )(self): photos.append(HousingPhoto(u'%s' % img)) return photos def obj_details(self): details = {} energy = CleanText( '//div[has-class("energy-summary")]/span[@class="section-label"]|//div[has-class("energy-summary")]/div/span[@class="section-label"]', default='')(self) energy_value = CleanText( '//div[has-class("energy-summary")]/span[@class="energy-msg"]', default='')(self) if energy and energy_value: details[energy] = energy_value greenhouse = CleanText( '//div[has-class("greenhouse-summary")]/span[@class="section-label"]|//div[has-class("greenhouse-summary")]/div/span[@class="section-label"]', default='')(self) greenhouse_value = CleanText( '//div[has-class("greenhouse-summary")]/span[@class="energy-msg"]', default='')(self) if greenhouse and greenhouse_value: details[greenhouse] = greenhouse_value for li in XPath('//ul[@itemprop="description"]/li')(self): label = CleanText('./div[has-class("criteria-label")]')(li) value = CleanText('./div[has-class("criteria-value")]')(li) details[label] = value return details
def next_page(self): link = Link('//a[@title="Page suivante" and @href]', default=None)(self) if link: form = self.page.get_form('//form') form['__EVENTTARGET'] = re.search( 'PostBackOptions[^\w]+([^"]+)', link).group(1) return requests.Request("POST", BrowserURL('investment', \ page=None)(self).replace('None', form.url), data=dict(form))
def next_page(self): page = Regexp(CleanText('//a[@data-action="fetch"]/@href', default=''), '.*page=(\d*)', default=None)(self) if page: return BrowserURL('adv_search', search=Env('search'), page=int(page))(self)
def next_page(self): page_nb = Dict('navigation/pagination/page')(self) max_results = Dict('navigation/pagination/maxResults')(self) results_per_page = Dict('navigation/pagination/resultsPerPage')( self) if int(max_results) / int(results_per_page) > int(page_nb): return BrowserURL('search', query=Env('query'), page_number=int(page_nb) + 1)(self)
def obj_coming(self): page = self.page.browser.open( BrowserURL('account_coming', identifiant=Field('iban'))(self)).page return Eval( lambda x, y: x / 10**y, CleanDecimal( Dict('infoOperationsAvenir/cumulTotal/montant', default='0')), CleanDecimal( Dict('infoOperationsAvenir/cumulTotal/nb_dec', default='0')))(page.doc)