Esempio n. 1
0
 def __find_similar_ad_from_pic(self, picture):
     new_hash = phash(Image.open(urlopen(picture)))
     hashes = [ad.picturehash for ad in Annonce.select()]
     for old_hash in hashes:
         if old_hash is not None and hex_to_hash(
                 old_hash) - new_hash < self.HASH_SIMILAR_TRESHOLD:
             return Annonce.get(Annonce.picturehash == old_hash)
         else:
             return False
Esempio n. 2
0
def search(parameters):
    # Préparation des paramètres de la requête
    payload = {
        "limit": 35,
        "limit_alu": 3,
        "filters": {
            "enums": {
                "ad_type": ["offer"]
            },
            "category": {
                "id": "10"
            },
            "location": {
                "locations": []
            },
            "ranges": {
                "square": {
                    "min": parameters['surface'][0],
                    "max": parameters['surface'][1]
                },
                "price": {
                    "min": parameters['price'][0],
                    "max": parameters['price'][1]
                }
            },
            "keywords": {}
        }
    }

    for cp in parameters['cities']:
        payload['filters']['location']['locations'].append(
            {'zipcode': str(cp[1])})

    header = {'api_key': 'ba0c2dad52b3ec'}

    request = requests.post("https://api.leboncoin.fr/finder/search",
                            json=payload,
                            headers=header)

    data = request.json()

    for ad in data['ads']:

        try:
            annonce = Annonce.get(id='lbc-' + str(ad['list_id']))

        except:

            _request = requests.get(
                "https://api.leboncoin.fr/finder/classified/" +
                str(ad['list_id']),
                headers=header)

            _data = _request.json()

            rooms, surface = 0, 0

            for param in _data.get('attributes'):
                if param['key'] == 'rooms':
                    rooms = param['value']
                if param['key'] == 'square':
                    surface = param['value'].replace(" m²", "")

            annonce, created = Annonce.get_or_create(
                id='lbc-' + str(_data.get('list_id')),
                defaults={
                    'site':
                    "Leboncoin Pro" if ad['owner']['no_salesmen'] == False else
                    "Leboncoin Particulier",
                    'created':
                    datetime.strptime(_data.get('first_publication_date'),
                                      "%Y-%m-%d %H:%M:%S"),
                    'title':
                    BeautifulSoup(_data.get('subject'), "lxml").text,
                    'description':
                    BeautifulSoup(
                        _data.get('body').replace("<br />", "\n"),
                        "lxml").text,
                    'telephone':
                    _data.get("phone"),
                    'price':
                    _data.get('price')[0],
                    'surface':
                    surface if surface.replace('.', '', 1).isdigit() else 0,
                    'rooms':
                    rooms,
                    'city':
                    _data.get('zipcode')
                    if _data.get('zipcode') is not None else '',
                    'link':
                    "https://www.leboncoin.fr/locations/%s.htm?ca=12_s" %
                    _data.get('list_id'),
                    'picture':
                    _data['images']['urls_large']
                    if 'urls_large' in _data['images'] else []
                })

            if created:
                annonce.save()
Esempio n. 3
0
def search(parameters):
    # Préparation des paramètres de la requête
    payload = {
        'px_loyermin': parameters['price'][0],
        'px_loyermax': parameters['price'][1],
        'surfacemin': parameters['surface'][0],
        'surfacemax': parameters['surface'][1],
        # Si parameters['rooms'] = (2, 4) => "2,3,4"
        'nbpieces': list(range(parameters['rooms'][0], parameters['rooms'][1] + 1)),
        # Si parameters['bedrooms'] = (2, 4) => "2,3,4"
        'nb_chambres': list(range(parameters['bedrooms'][0], parameters['bedrooms'][1] + 1)),
        'ci': [int(cp[2]) for cp in parameters['cities']]
    }
    # Insertion des paramètres propres à LeBonCoin
    payload.update(parameters['seloger'])

    headers = {'user-agent': 'Dalvik/2.1.0 (Linux; U; Android 6.0.1; D5803 Build/MOB30M.Z1)'}

    request = requests.get("http://ws-seloger.svc.groupe-seloger.com/search_4.0.xml", params=payload, headers=headers)

    xml_root = ET.fromstring(request.text)

    for annonceNode in xml_root.findall('annonces/annonce'):
        # Seconde requête pour obtenir la description de l'annonce
        # TODO: seulement si id absent de la base (limite les requetes api)

        try:
            annonce = Annonce.get(
                id = 'seloger-' + annonceNode.find('idAnnonce').text
            )

        except:
            _payload = {'noAudiotel': 1, 'idAnnonce': annonceNode.findtext('idAnnonce')}
            _request = requests.get("http://ws-seloger.svc.groupe-seloger.com/annonceDetail_4.0.xml", params=_payload, headers=headers)

            photos = list()
            for photo in annonceNode.find("photos"):
                photos.append(photo.findtext("stdUrl"))
                # Une seule photo, la première
                # break

            annonce, created = Annonce.get_or_create(
                id = 'seloger-' + annonceNode.find('idAnnonce').text,
                site = 'SeLoger',
                defaults = {
                    # SeLoger peut ne pas fournir de titre pour une annonce T_T
                    'title': "Appartement " + annonceNode.findtext('nbPiece') + " pièces" if annonceNode.findtext('titre') is None else annonceNode.findtext('titre'),
                    'description': ET.fromstring(_request.text).findtext("descriptif"),
                    'telephone': ET.fromstring(_request.text).findtext("contact/telephone"),
                    'created': datetime.strptime(annonceNode.findtext('dtCreation'), '%Y-%m-%dT%H:%M:%S'),
                    'price': annonceNode.find('prix').text,
                    'charges': annonceNode.find('charges').text,
                    'surface': annonceNode.find('surface').text,
                    'rooms': annonceNode.find('nbPiece').text,
                    'bedrooms': annonceNode.find('nbChambre').text,
                    'city': annonceNode.findtext('ville'),
                    'link': annonceNode.findtext('permaLien'),
                    'picture': photos
                }
            )

            if created:
                annonce.save()