Ejemplo n.º 1
0
    def get_housing(self):
        div = self.parser.select(self.document.getroot(), 'div#annonce_detail',
                                 1)
        housing = Housing(self.url.split('-')[-1])

        parts = div.find('h1').text.split(' - ')
        housing.title = parts[0].strip()
        housing.cost = Decimal(parts[1].strip(u' \t\u20ac\xa0€\n\r').replace(
            '.', '').replace(',', '.'))
        housing.currency = u'€'

        m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title)
        if m:
            housing.area = Decimal(m.group(3))

        housing.date = housing.station = housing.location = housing.phone = NotAvailable

        metro = div.cssselect('p.metro')
        if len(metro) > 0:
            housing.station = metro[0].text.strip()

        p = div.cssselect('p.annonce-detail-texte')[0]
        b = p.findall('b')
        if len(b) > 0:
            housing.text = b[0].tail.strip()
            housing.location = unicode(b[0].text)
            if len(b) > 1:
                housing.phone = b[1].text
        else:
            housing.text = p.text.strip()

        housing.details = NotAvailable
        housing.photos = NotAvailable

        return housing
Ejemplo n.º 2
0
    def get_housing(self):
        div = self.parser.select(self.document.getroot(), 'div#annonce_detail', 1)
        housing = Housing(self.url.split('-')[-1])

        parts = div.find('h1').text.split(' - ')
        housing.title = parts[0].strip()
        housing.cost = Decimal(parts[1].strip(u' \t\u20ac\xa0€\n\r').replace('.', '').replace(',', '.'))
        housing.currency = u'€'

        m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title)
        if m:
            housing.area = Decimal(m.group(3))

        housing.date = housing.station = housing.location = housing.phone = NotAvailable

        metro = div.cssselect('p.metro')
        if len(metro) > 0:
            housing.station = metro[0].text.strip()

        p = div.cssselect('p.annonce-detail-texte')[0]
        b = p.findall('b')
        if len(b) > 0:
            housing.text = b[0].tail.strip()
            housing.location = unicode(b[0].text)
            if len(b) > 1:
                housing.phone = b[1].text
        else:
            housing.text = p.text.strip()

        housing.details = NotAvailable
        housing.photos = NotAvailable

        return housing
Ejemplo n.º 3
0
    def iter_housings(self):
        for div in self.document.getroot().cssselect('div.annonce-resume'):
            a = div.cssselect('td.lien-annonce')[0].find('a')
            if a is None:
                # not a real announce.
                continue

            id = a.attrib['href'].split('-')[-1]
            housing = Housing(id)
            housing.title = a.text.strip()
            m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title)
            if m:
                housing.area = Decimal(m.group(3))

            housing.cost = Decimal(
                div.cssselect('td.prix')[0].text.strip(
                    u' \t\u20ac\xa0€\n\r').replace('.', '').replace(',', '.'))
            housing.currency = u'€'

            m = self.DATE_RE.match(
                div.cssselect('p.date-publication')[0].text.strip())
            if m:
                date = m.group(1)
                for fr, en in self.MONTHS.iteritems():
                    date = date.replace(fr, en)
                housing.date = parse_date(date)

            metro = div.cssselect('p.metro')
            if len(metro) > 0:
                housing.station = unicode(metro[0].text.strip())
            else:
                housing.station = NotAvailable

            p = div.cssselect('p.annonce-resume-texte')[0]
            b = p.findall('b')
            if len(b) > 0:
                housing.text = b[0].tail.strip()
                housing.location = unicode(b[0].text)
            else:
                housing.text = p.text.strip()

            housing.photos = NotAvailable

            yield housing
Ejemplo n.º 4
0
    def get_housing(self, housing=None):
        if housing is None:
            housing = Housing(self.groups[0])

        details = self.document.getroot().xpath('//detailannonce')[0]
        if details.find('titre') is None:
            return None

        housing.title = unicode(details.find('titre').text)
        housing.text = details.find('descriptif').text.strip()
        housing.cost = Decimal(details.find('prix').text)
        housing.currency = u'€'
        housing.date = parse_date(details.find('dtfraicheur').text)
        housing.area = Decimal(details.find('surface').text)
        housing.phone = unicode(details.find('contact').find('telephone').text)

        try:
            housing.station = unicode(details.find('proximite').text)
        except AttributeError:
            housing.station = NotAvailable

        housing.location = details.find('adresse').text
        if not housing.location and details.find('quartier') is not None:
            housing.location = unicode(details.find('quartier').text)
        if not housing.location:
            housing.location = NotAvailable

        housing.photos = []
        for photo in details.xpath('./photos/photo'):
            if photo.find('bigurl').text:
                url = photo.find('bigurl').text
            else:
                url = photo.find('stdurl').text
            housing.photos.append(HousingPhoto(unicode(url)))

        housing.details = {}
        for detail in details.xpath('./details/detail'):
            housing.details[detail.find(
                'libelle').text.strip()] = detail.find('valeur').text or 'N/A'

        housing.details['Reference'] = details.find('reference').text

        return housing
Ejemplo n.º 5
0
    def get_housing(self, housing=None):
        if housing is None:
            housing = Housing(self.groups[0])

        details = self.document.getroot().xpath('//detailannonce')[0]
        if details.find('titre') is None:
            return None

        housing.title = unicode(details.find('titre').text)
        housing.text = details.find('descriptif').text.strip()
        housing.cost = Decimal(details.find('prix').text)
        housing.currency = u'€'
        housing.date = parse_date(details.find('dtfraicheur').text)
        housing.area = Decimal(details.find('surface').text)
        housing.phone = unicode(details.find('contact').find('telephone').text)

        try:
            housing.station = unicode(details.find('proximite').text)
        except AttributeError:
            housing.station = NotAvailable

        housing.location = details.find('adresse').text
        if not housing.location and details.find('quartier') is not None:
            housing.location = unicode(details.find('quartier').text)
        if not housing.location:
            housing.location = NotAvailable

        housing.photos = []
        for photo in details.xpath('./photos/photo'):
            if photo.find('bigurl').text:
                url = photo.find('bigurl').text
            else:
                url = photo.find('stdurl').text
            housing.photos.append(HousingPhoto(unicode(url)))

        housing.details = {}
        for detail in details.xpath('./details/detail'):
            housing.details[detail.find('libelle').text.strip()] = detail.find('valeur').text or 'N/A'

        housing.details['Reference'] = details.find('reference').text

        return housing
Ejemplo n.º 6
0
    def iter_housings(self):
        for div in self.document.getroot().cssselect('div.annonce-resume'):
            a = div.cssselect('td.lien-annonce')[0].find('a')
            if a is None:
                # not a real announce.
                continue

            id = a.attrib['href'].split('-')[-1]
            housing = Housing(id)
            housing.title = a.text.strip()
            m = re.match('(\w+) (.+) (\d+)\xa0m\xb2 (.*)', housing.title)
            if m:
                housing.area = Decimal(m.group(3))

            housing.cost = Decimal(div.cssselect('td.prix')[0].text.strip(u' \t\u20ac\xa0€\n\r').replace('.', '').replace(',', '.'))
            housing.currency = u'€'

            m = self.DATE_RE.match(div.cssselect('p.date-publication')[0].text.strip())
            if m:
                date = m.group(1)
                for fr, en in self.MONTHS.iteritems():
                    date = date.replace(fr, en)
                housing.date = parse_date(date)

            metro = div.cssselect('p.metro')
            if len(metro) > 0:
                housing.station = unicode(metro[0].text.strip())
            else:
                housing.station = NotAvailable

            p = div.cssselect('p.annonce-resume-texte')[0]
            b = p.findall('b')
            if len(b) > 0:
                housing.text = b[0].tail.strip()
                housing.location = unicode(b[0].text)
            else:
                housing.text = p.text.strip()

            housing.photos = NotAvailable

            yield housing
Ejemplo n.º 7
0
    def iter_housings(self):
        for a in self.document.getroot().xpath('//annonce'):
            housing = Housing(a.find('idannonce').text)
            housing.title = unicode(a.find('titre').text)
            housing.date = parse_date(a.find('dtfraicheur').text)
            housing.cost = Decimal(a.find('prix').text)
            housing.currency = u'€'
            housing.area = Decimal(a.find('surface').text)
            housing.text = unicode(a.find('descriptif').text.strip())
            housing.location = unicode(a.find('ville').text)
            try:
                housing.station = unicode(a.find('proximite').text)
            except AttributeError:
                housing.station = NotAvailable

            housing.photos = []
            for photo in a.xpath('./photos/photo'):
                url = unicode(photo.find('stdurl').text)
                housing.photos.append(HousingPhoto(url))

            yield housing
Ejemplo n.º 8
0
    def iter_housings(self):
        for a in self.document.getroot().xpath('//annonce'):
            housing = Housing(a.find('idannonce').text)
            housing.title = unicode(a.find('titre').text)
            housing.date = parse_date(a.find('dtfraicheur').text)
            housing.cost = Decimal(a.find('prix').text)
            housing.currency = u'€'
            housing.area = Decimal(a.find('surface').text)
            housing.text = unicode(a.find('descriptif').text.strip())
            housing.location = unicode(a.find('ville').text)
            try:
                housing.station = unicode(a.find('proximite').text)
            except AttributeError:
                housing.station = NotAvailable

            housing.photos = []
            for photo in a.xpath('./photos/photo'):
                url = unicode(photo.find('stdurl').text)
                housing.photos.append(HousingPhoto(url))

            yield housing