def __find_similar_ad_from_pic(self, picture): new_hash = phash(Image.open(urlopen(picture))) hashes = [ad.picturehash for ad in Annonce.select()] for old_hash in hashes: if old_hash is not None and hex_to_hash( old_hash) - new_hash < self.HASH_SIMILAR_TRESHOLD: return Annonce.get(Annonce.picturehash == old_hash) else: return False
def search(parameters): # Préparation des paramètres de la requête payload = { "limit": 35, "limit_alu": 3, "filters": { "enums": { "ad_type": ["offer"] }, "category": { "id": "10" }, "location": { "locations": [] }, "ranges": { "square": { "min": parameters['surface'][0], "max": parameters['surface'][1] }, "price": { "min": parameters['price'][0], "max": parameters['price'][1] } }, "keywords": {} } } for cp in parameters['cities']: payload['filters']['location']['locations'].append( {'zipcode': str(cp[1])}) header = {'api_key': 'ba0c2dad52b3ec'} request = requests.post("https://api.leboncoin.fr/finder/search", json=payload, headers=header) data = request.json() for ad in data['ads']: try: annonce = Annonce.get(id='lbc-' + str(ad['list_id'])) except: _request = requests.get( "https://api.leboncoin.fr/finder/classified/" + str(ad['list_id']), headers=header) _data = _request.json() rooms, surface = 0, 0 for param in _data.get('attributes'): if param['key'] == 'rooms': rooms = param['value'] if param['key'] == 'square': surface = param['value'].replace(" m²", "") annonce, created = Annonce.get_or_create( id='lbc-' + str(_data.get('list_id')), defaults={ 'site': "Leboncoin Pro" if ad['owner']['no_salesmen'] == False else "Leboncoin Particulier", 'created': datetime.strptime(_data.get('first_publication_date'), "%Y-%m-%d %H:%M:%S"), 'title': BeautifulSoup(_data.get('subject'), "lxml").text, 'description': BeautifulSoup( _data.get('body').replace("<br />", "\n"), "lxml").text, 'telephone': _data.get("phone"), 'price': _data.get('price')[0], 'surface': surface if surface.replace('.', '', 1).isdigit() else 0, 'rooms': rooms, 'city': _data.get('zipcode') if _data.get('zipcode') is not None else '', 'link': "https://www.leboncoin.fr/locations/%s.htm?ca=12_s" % _data.get('list_id'), 'picture': _data['images']['urls_large'] if 'urls_large' in _data['images'] else [] }) if created: annonce.save()
def search(parameters): # Préparation des paramètres de la requête payload = { 'px_loyermin': parameters['price'][0], 'px_loyermax': parameters['price'][1], 'surfacemin': parameters['surface'][0], 'surfacemax': parameters['surface'][1], # Si parameters['rooms'] = (2, 4) => "2,3,4" 'nbpieces': list(range(parameters['rooms'][0], parameters['rooms'][1] + 1)), # Si parameters['bedrooms'] = (2, 4) => "2,3,4" 'nb_chambres': list(range(parameters['bedrooms'][0], parameters['bedrooms'][1] + 1)), 'ci': [int(cp[2]) for cp in parameters['cities']] } # Insertion des paramètres propres à LeBonCoin payload.update(parameters['seloger']) headers = {'user-agent': 'Dalvik/2.1.0 (Linux; U; Android 6.0.1; D5803 Build/MOB30M.Z1)'} request = requests.get("http://ws-seloger.svc.groupe-seloger.com/search_4.0.xml", params=payload, headers=headers) xml_root = ET.fromstring(request.text) for annonceNode in xml_root.findall('annonces/annonce'): # Seconde requête pour obtenir la description de l'annonce # TODO: seulement si id absent de la base (limite les requetes api) try: annonce = Annonce.get( id = 'seloger-' + annonceNode.find('idAnnonce').text ) except: _payload = {'noAudiotel': 1, 'idAnnonce': annonceNode.findtext('idAnnonce')} _request = requests.get("http://ws-seloger.svc.groupe-seloger.com/annonceDetail_4.0.xml", params=_payload, headers=headers) photos = list() for photo in annonceNode.find("photos"): photos.append(photo.findtext("stdUrl")) # Une seule photo, la première # break annonce, created = Annonce.get_or_create( id = 'seloger-' + annonceNode.find('idAnnonce').text, site = 'SeLoger', defaults = { # SeLoger peut ne pas fournir de titre pour une annonce T_T 'title': "Appartement " + annonceNode.findtext('nbPiece') + " pièces" if annonceNode.findtext('titre') is None else annonceNode.findtext('titre'), 'description': ET.fromstring(_request.text).findtext("descriptif"), 'telephone': ET.fromstring(_request.text).findtext("contact/telephone"), 'created': datetime.strptime(annonceNode.findtext('dtCreation'), '%Y-%m-%dT%H:%M:%S'), 'price': annonceNode.find('prix').text, 'charges': annonceNode.find('charges').text, 'surface': annonceNode.find('surface').text, 'rooms': annonceNode.find('nbPiece').text, 'bedrooms': annonceNode.find('nbChambre').text, 'city': annonceNode.findtext('ville'), 'link': annonceNode.findtext('permaLien'), 'picture': photos } ) if created: annonce.save()