def get_availability_paris(): bdd = Pablo() bdd.executerReq("SELECT id_airbnb FROM airbnb") lenght = len(bdd.resultatReq()) i = 11000 bdd.executerReq("SELECT id_airbnb FROM airbnb") for listing in bdd.resultatReq()[11000:]: print("%s on %s listings" % (i, lenght)) get_availability(listing[0]) time.sleep(0.5) i += 1
def get_users_paris(): bdd = Pablo() # bdd.executerReq("SELECT DISTINCT author_id FROM airbnb_review") # for user in bdd.resultatReq(): # get_user(user) bdd.executerReq("SELECT DISTINCT recipient_id FROM airbnb_review limit 3") for user in bdd.resultatReq(): get_user(user[0]) bdd.close()
def main(): bdd = Pablo() insert_listing = """INSERT INTO airbnb_rev_june (listing_id, city) VALUES (%s, %s)""" # insertion des listings 20K bdd.executerReq("SELECT DISTINCT listing_id, city FROM airbnb_reviews_20k") listings = bdd.resultatReq() # print(listings) # bdd.cursor.executemany(insert_listing, listings) nb_rev = bdd.executerReq( """SELECT COUNT(id) as nb_rev, listing_id FROM airbnb_review_global WHERE date_creation > 20170531 and date_creation < 20170701 group by listing_id""" ) for elem in bdd.resultatReq(): bdd.exec_req_with_args( """UPDATE airbnb_rev_june SET nb_reviews = %s WHERE listing_id = %s""", (elem[0], elem[1]))
def search_on_all_squares(): bdd = Pablo() insert_req = """INSERT INTO airbnb (id_airbnb, listing_name, rate, review_nb, star_rating, city, superhost, bed_nb, picture_nb, latitude, longitude, business_travel, is_new, date_maj, instant_book) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" lst_coordinates = get_square() # loop on all squares for coo in lst_coordinates: for appart in get_listings_by_gps(coo[2], coo[3], coo[0], coo[1]): date_maj = time.strftime("%Y%m%d") instant_book = appart['pricing_quote']['can_instant_book'] rate_amount = appart['pricing_quote']['rate']['amount'] infos = appart['listing'] is_new = infos['is_new_listing'] nb_reviews = infos['reviews_count'] star_rating = infos['star_rating'] name = infos['name'] capacity = infos['person_capacity'] city = infos['localized_city'] superhost = infos['is_superhost'] room_type = infos['room_type'] bed_nb = infos['beds'] picture_nb = infos['picture_count'] latitude = infos['lat'] longitude = infos['lng'] business_travel_ready = infos['is_business_travel_ready'] id_airbnb = appart['listing']['id'] # gps coordiantes => exact address. # thx to government API url_req = "http://api-adresse.data.gouv.fr/reverse/?lon=%s&lat=%s" % (longitude, latitude) address_json = json.loads(requests.get(url_req)) street = address_json['features'][0]['properties']['name'] city = address_json['features'][0]['properties']['city'] postcode = address_json['features'][0]['properties']['postcode'] http://api-adresse.data.gouv.fr/reverse/?lon=2.37&lat=48.357 print(name.encode('utf-8')) params = (id_airbnb, name, rate_amount, nb_reviews, star_rating, city, superhost, bed_nb, picture_nb, latitude, longitude, business_travel_ready, is_new, date_maj, instant_book) bdd.exec_req_with_args(insert_req, params) bdd.close()
def get_some_review_paris(): bdd = Pablo() i = 0 # bdd.executerReq("SELECT distinct listing_id from airbnb_reviews_20k order by id desc") req = """SELECT listing_id FROM airbnb_reviews_20k WHERE listing_id NOT IN (SELECT DISTINCT listing_id FROM airbnb_review_global WHERE date_creation > 20170531 AND date_creation < 20170701)""" bdd.executerReq(req) for listing in bdd.resultatReq()[::-1]: i += 1 id_listing = listing[0] print("listing number : %s" % i) get_all_reviews(id_listing) bdd.close()
def get_user(user_id): bdd = Pablo() insert_query = """INSERT INTO airbnb_user (user_id, name, creation_date, response_rate, response_time, listing_nb, description) VALUES (%s, %s, %s, %s, %s, %s, %s)""" user = get_user_infos(user_id) user_id = user['id'] name = user['smart_name'] creation_date = user['created_at'][:10] response_rate = user['response_rate'] response_time = user['response_time'] listing_nb = user['listings_count'] description = user['about'] params = (user_id, name, creation_date, response_rate, response_time, listing_nb, description) bdd.exec_req_with_args(insert_query, params) bdd.close()
def get_all_reviews(logement_id): bdd = Pablo() insert_query = """INSERT INTO airbnb_review_global (review_id, author_id, listing_id, recipient_id, content, date_creation, language, date_extract) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)""" for review in get_reviews(logement_id): review_id = review['id'] author_id = review['author_id'] listing_id = review['listing_id'] recipient_id = review['recipient_id'] content = review['comments'] date_creation = review['created_at'][:10] language = review['language'] params = (review_id, author_id, listing_id, recipient_id, content, date_creation, language, strftime("%Y%m%d")) bdd.exec_req_with_args(insert_query, params) bdd.close()
def get_availability(listing_id): bdd = Pablo() insert_query = """INSERT INTO airbnb_dispo_global (listing_id, date_dispo, availability, price, date_extract) VALUES (%s, %s, %s, %s, %s)""" update_query = """UPDATE airbnb_dispo SET availability = %s, price = %s, date_extract = %s WHERE listing_id = %s AND date_dispo = %s""" # init parameters today = time.strftime("%Y%m%d") month = time.strftime("%m") year = time.strftime("%Y") count = 2 # in case requests bugs, or airbnb blocks too many requests try: calendar = get_available(listing_id, month, year, count) except requests.exceptions.ConnectionError: time.sleep(200) return if calendar is None: time.sleep(15) return 0 for month in calendar: for day in month['days']: date_dispo = day['date'] availability = 1 if day['available'] else 0 price = day['price']['local_price'] params = (listing_id, date_dispo, availability, price, today) bdd.exec_req_with_args(insert_query, params) bdd.close()
def get_listings_from_city_name(city_name): bdd = Pablo() insert_req = """INSERT INTO airbnb ( id_airbnb, listing_name, rate, review_nb, star_rating, bed_nb, capacity, room_type, instant_book, superhost, business_travel, is_new, picture_nb, latitude, longitude, city, date_maj) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""" update_req = """UPDATE airbnb SET rate = %s, review_nb = %s, star_rating = %s, instant_book = %s, superhost = %s, business_travel = %s, is_new = %s, picture_nb = %s, date_maj = %s WHERE id_airbnb = %s""" lst_coordinates = get_squares(city_name) i = 1 nb_squares_to_check = len(lst_coordinates) # loop on all squares for coo in lst_coordinates: print("\n%s/%s squares on %s" % (i, nb_squares_to_check, city_name)) i += 1 for appart in get_listings_by_gps(coo[2], coo[3], coo[0], coo[1]): infos = appart['listing'] id_airbnb = appart['listing']['id'] name = infos['name'] rate_amount = appart['pricing_quote']['rate']['amount'] nb_reviews = infos['reviews_count'] star_rating = infos['star_rating'] bed_nb = infos['beds'] capacity = infos['person_capacity'] room_type = infos['room_type'] instant_book = appart['pricing_quote']['can_instant_book'] superhost = infos['is_superhost'] business_travel_ready = infos['is_business_travel_ready'] is_new = infos['is_new_listing'] picture_nb = infos['picture_count'] latitude = infos['lat'] longitude = infos['lng'] city = infos['localized_city'] date_maj = time.strftime("%Y%m%d") print(name.encode('utf-8')) params = (id_airbnb, name, rate_amount, nb_reviews, star_rating, bed_nb, capacity, room_type, instant_book, superhost, business_travel_ready, is_new, picture_nb, latitude, longitude, city, date_maj) try: bdd.cursor.execute(insert_req, params) except IntegrityError: print("integrity error for %s, updating" % str(id_airbnb)) params = (rate_amount, nb_reviews, star_rating, instant_book, superhost, business_travel_ready, is_new, picture_nb, date_maj, id_airbnb) bdd.exec_req_with_args(update_req, params) bdd.close()
def get_new_users(): pass bdd = Pablo()