def get_housing(self, housing=None): if housing is None: housing = Housing(self.groups[0]) details = self.document.getroot().xpath('//detailannonce')[0] if details.find('titre') is None: return None housing.title = unicode(details.find('titre').text) housing.text = details.find('descriptif').text.strip() housing.cost = Decimal(details.find('prix').text) housing.currency = u'€' housing.date = parse_date(details.find('dtfraicheur').text) housing.area = Decimal(details.find('surface').text) housing.phone = unicode(details.find('contact').find('telephone').text) try: housing.station = unicode(details.find('proximite').text) except AttributeError: housing.station = NotAvailable housing.location = details.find('adresse').text if not housing.location and details.find('quartier') is not None: housing.location = unicode(details.find('quartier').text) if not housing.location: housing.location = NotAvailable housing.photos = [] for photo in details.xpath('./photos/photo'): if photo.find('bigurl').text: url = photo.find('bigurl').text else: url = photo.find('stdurl').text housing.photos.append(HousingPhoto(unicode(url))) housing.details = {} for detail in details.xpath('./details/detail'): housing.details[detail.find('libelle').text.strip()] = detail.find('valeur').text or 'N/A' housing.details['Reference'] = details.find('reference').text return housing
def get_housing(self): div = self.parser.select(self.document.getroot(), 'div#annonce_detail', 1) housing = Housing(self.url.split('-')[-1]) parts = div.find('h1').text.split(' - ') housing.title = parts[0].strip() housing.cost = Decimal(parts[1].strip(u' \t\u20ac\xa0€\n\r').replace( '.', '').replace(',', '.')) housing.currency = u'€' m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title) if m: housing.area = Decimal(m.group(3)) housing.date = housing.station = housing.location = housing.phone = NotAvailable metro = div.cssselect('p.metro') if len(metro) > 0: housing.station = metro[0].text.strip() p = div.cssselect('p.annonce-detail-texte')[0] b = p.findall('b') if len(b) > 0: housing.text = b[0].tail.strip() housing.location = unicode(b[0].text) if len(b) > 1: housing.phone = b[1].text else: housing.text = p.text.strip() housing.details = NotAvailable housing.photos = NotAvailable return housing
def iter_housings(self): for a in self.document.getroot().xpath('//annonce'): housing = Housing(a.find('idannonce').text) housing.title = unicode(a.find('titre').text) housing.date = parse_date(a.find('dtfraicheur').text) housing.cost = Decimal(a.find('prix').text) housing.currency = u'€' housing.area = Decimal(a.find('surface').text) housing.text = unicode(a.find('descriptif').text.strip()) housing.location = unicode(a.find('ville').text) try: housing.station = unicode(a.find('proximite').text) except AttributeError: housing.station = NotAvailable housing.photos = [] for photo in a.xpath('./photos/photo'): url = unicode(photo.find('stdurl').text) housing.photos.append(HousingPhoto(url)) yield housing
def iter_housings(self): for div in self.document.getroot().cssselect('div.annonce-resume'): a = div.cssselect('td.lien-annonce')[0].find('a') if a is None: # not a real announce. continue id = a.attrib['href'].split('-')[-1] housing = Housing(id) housing.title = a.text.strip() m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title) if m: housing.area = Decimal(m.group(3)) housing.cost = Decimal( div.cssselect('td.prix')[0].text.strip( u' \t\u20ac\xa0€\n\r').replace('.', '').replace(',', '.')) housing.currency = u'€' m = self.DATE_RE.match( div.cssselect('p.date-publication')[0].text.strip()) if m: date = m.group(1) for fr, en in self.MONTHS.iteritems(): date = date.replace(fr, en) housing.date = parse_date(date) metro = div.cssselect('p.metro') if len(metro) > 0: housing.station = unicode(metro[0].text.strip()) else: housing.station = NotAvailable p = div.cssselect('p.annonce-resume-texte')[0] b = p.findall('b') if len(b) > 0: housing.text = b[0].tail.strip() housing.location = unicode(b[0].text) else: housing.text = p.text.strip() housing.photos = NotAvailable yield housing
def get_housing(self): div = self.parser.select(self.document.getroot(), 'div#annonce_detail', 1) housing = Housing(self.url.split('-')[-1]) parts = div.find('h1').text.split(' - ') housing.title = parts[0].strip() housing.cost = Decimal(parts[1].strip(u' \t\u20ac\xa0€\n\r').replace('.', '').replace(',', '.')) housing.currency = u'€' m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title) if m: housing.area = Decimal(m.group(3)) housing.date = housing.station = housing.location = housing.phone = NotAvailable metro = div.cssselect('p.metro') if len(metro) > 0: housing.station = metro[0].text.strip() p = div.cssselect('p.annonce-detail-texte')[0] b = p.findall('b') if len(b) > 0: housing.text = b[0].tail.strip() housing.location = unicode(b[0].text) if len(b) > 1: housing.phone = b[1].text else: housing.text = p.text.strip() housing.details = NotAvailable housing.photos = NotAvailable return housing
def iter_housings(self): for div in self.document.getroot().cssselect('div.annonce-resume'): a = div.cssselect('td.lien-annonce')[0].find('a') if a is None: # not a real announce. continue id = a.attrib['href'].split('-')[-1] housing = Housing(id) housing.title = a.text.strip() m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title) if m: housing.area = Decimal(m.group(3)) housing.cost = Decimal(div.cssselect('td.prix')[0].text.strip(u' \t\u20ac\xa0€\n\r').replace('.', '').replace(',', '.')) housing.currency = u'€' m = self.DATE_RE.match(div.cssselect('p.date-publication')[0].text.strip()) if m: date = m.group(1) for fr, en in self.MONTHS.iteritems(): date = date.replace(fr, en) housing.date = parse_date(date) metro = div.cssselect('p.metro') if len(metro) > 0: housing.station = unicode(metro[0].text.strip()) else: housing.station = NotAvailable p = div.cssselect('p.annonce-resume-texte')[0] b = p.findall('b') if len(b) > 0: housing.text = b[0].tail.strip() housing.location = unicode(b[0].text) else: housing.text = p.text.strip() housing.photos = NotAvailable yield housing
def get_housing(self, housing=None): if housing is None: housing = Housing(self.groups[0]) details = self.document.getroot().xpath('//detailannonce')[0] if details.find('titre') is None: return None housing.title = unicode(details.find('titre').text) housing.text = details.find('descriptif').text.strip() housing.cost = Decimal(details.find('prix').text) housing.currency = u'€' housing.date = parse_date(details.find('dtfraicheur').text) housing.area = Decimal(details.find('surface').text) housing.phone = unicode(details.find('contact').find('telephone').text) try: housing.station = unicode(details.find('proximite').text) except AttributeError: housing.station = NotAvailable housing.location = details.find('adresse').text if not housing.location and details.find('quartier') is not None: housing.location = unicode(details.find('quartier').text) if not housing.location: housing.location = NotAvailable housing.photos = [] for photo in details.xpath('./photos/photo'): if photo.find('bigurl').text: url = photo.find('bigurl').text else: url = photo.find('stdurl').text housing.photos.append(HousingPhoto(unicode(url))) housing.details = {} for detail in details.xpath('./details/detail'): housing.details[detail.find( 'libelle').text.strip()] = detail.find('valeur').text or 'N/A' housing.details['Reference'] = details.find('reference').text return housing