def safe_city_prices(city): """city - eng city name (Moscow)""" url = "https://www.numbeo.com/cost-of-living/in/{city}?displayCurrency=RUB".format( city=city) html = get_html(url) if not html: print("HTML for {city} live_prices doesn't returned".format(city=city)) time.sleep(get_random_sleep_time()) html = get_html(url) if not html: return False try: prices = get_live_prices(html) except Exception as e: print(e) print( "Wrong HTML for live prices {city} trying again".format(city=city)) try: time.sleep(get_random_sleep_time()) html = get_html(url) prices = get_live_prices(html) except Exception as e: print(e) print("Wrong HTML for live prices {city} second time".format( city=city)) return False return prices
def safe_city_prices(city): """city - eng city name (Moscow)""" url = f"https://www.numbeo.com/cost-of-living/in/{city.title()}?displayCurrency=RUB" html = get_html(url) if not html: print(f"HTML for {city} live_prices doesn't returned") time.sleep(get_random_sleep_time()) html = get_html(url) if not html: return False try: prices = get_live_prices(html) except Exception as e: print(e) print(f"Wrong HTML for live prices {city} trying again") try: time.sleep(get_random_sleep_time()) html = get_html(url) prices = get_live_prices(html) except Exception as e: print(e) print(f"Wrong HTML for live prices {city} second time") return False update = City.query.filter_by(eng_name=city.lower()).first() update.inexpensive_meal_price = int(prices['Meal, Inexpensive Restaurant']) update.restaurant_2_persons = int( prices['Meal for 2 People, Mid-range Restaurant, Three-course']) update.water_033 = int(prices['Water (0.33 liter bottle)']) update.one_way_ticket = int(prices['One-way Ticket (Local Transport)']) update.internet = int( prices['Internet (60 Mbps or More, Unlimited Data, Cable/ADSL)']) db.session.commit() return True
def test_get_living_prices(): with app.app_context(): url = "https://www.numbeo.com/cost-of-living/in/{}?displayCurrency=RUB".format( "Moscow") html = get_html(url) result = get_live_prices(html) assert result
def get_all_hotels(city, checkin, checkout): """Parsing all hotels in the city in 7 days range, adding all hotels information and averageinfo in db params: - city: string object, city name in russian - checkin: string object, checkin date in format dd/mm/YYYY - checkout: string object, checkout date in format dd/mm/YYYY return: bool object "False or True" """ parsing_date = datetime.now(timezone("Europe/Moscow")).strftime("%d/%m/%Y") url = get_url(city, checkin, checkout) week_number = int(datetime.strptime(checkin, "%d/%m/%Y").strftime("%W")) year = int(datetime.strptime(checkin, "%d/%m/%Y").strftime("%Y")) html = get_html(url) if not html: print("First HTML doesn't returned, requesting again") time.sleep(get_random_sleep_time()) html = get_html(url) if not html: print("First HTML doesn't returned at all") return False try: pages = get_page_count(html) except Exception as e: print(e) print(f"HTML for pages, {city}-{checkin}-{checkout} doesn't returned") return False print(f"Parsing process {city} - {checkin} - {checkout} - started") for page in range(pages - 1): html = get_html(url) if not html: time.sleep(get_random_sleep_time()) print(f"HTML for {page + 1}/{pages} doesn't returned, requesting again") html = get_html(url) if not html: time.sleep(get_random_sleep_time()) print(f"HTML for {page + 1}/{pages} doesn't returned, requesting again 2") html = get_html(url) if not html: print(f"HTML for {page + 1}/{pages}doesn't returned at all") return True try: get_hotel_information(html, city, checkin, checkout) url = get_next_page_href(html) except Exception as e: print(e) print(f"Page {page + 1}/{pages} crashed, trying again") try: time.sleep(get_random_sleep_time()) print(f"Parsing page {page + 1}/{pages} again") html = get_html(url) get_hotel_information(html, city, checkin, checkout) url = get_next_page_href(html) except Exception as e: print(e) print(f"Page {page + 1}/{pages} crashed, second TIME") continue # print(f"page {page + 1}/{pages} parsed time={datetime.now()}") time.sleep(get_random_sleep_time()) city_id = City.query.filter(or_(City.ru_name == city.lower(), City.eng_name == city.lower())).first().id avg_exist = db.session.query( db.exists().where(AvgPriceReviews.city_id == city_id) .where(AvgPriceReviews.week_number == week_number) .where(AvgPriceReviews.year == year)).scalar() if avg_exist: x = AvgPriceReviews.query.filter(AvgPriceReviews.city_id == city_id) \ .filter(AvgPriceReviews.week_number == week_number) \ .filter(AvgPriceReviews.year == year).first() x.avg_week_price = get_avg_price(city_id, week_number, year) x.avg_reviews = get_avg_reviews(city_id, week_number, year) x.avg_day_price = int(get_avg_price(city_id, week_number, year) / 7) x.parsing_date = parsing_date x.year = year db.session.commit() else: db.session.add(AvgPriceReviews( city_id=city_id, avg_reviews=get_avg_reviews(city_id, week_number, year), avg_week_price=get_avg_price(city_id, week_number, year), avg_day_price=int(get_avg_price(city_id, week_number, year) / 7), parsing_date=parsing_date, week_number=week_number, year=year) ) db.session.commit() return True
def repeat_get_html(url): html = get_html(url) if not html: return False return html
def get_all_hotels(city, checkin, checkout): parsing_date = datetime.now(timezone("Europe/Moscow")).strftime("%d-%m-%Y") # url = get_url(city, checkin, checkout) url = "https://www.booking.com/" week_number = int(datetime.strptime(checkin, "%d/%m/%Y").strftime("%W")) year = int(datetime.strptime(checkin, "%d/%m/%Y").strftime("%Y")) html = get_html(url) if not html: print("First HTML doesn't returned, requesting again") time.sleep(1) html = get_html(url) if not html: print("First HTML doesn't returned at all") return False try: pages = get_page_count(html) except Exception as e: # with open(f"errors/Pages for {city} - week={week_number}.html", "w") as f: # f.write(html) print(e) print(f"HTML for pages, {city}-{checkin}-{checkout} doesn't returned") return False print(f"Parsing process {city} - {checkin} - {checkout} - started") for page in range(pages - 1): html = get_html(url) if not html: time.sleep(1) print( f"HTML for {page + 1}/{pages} doesn't returned, requesting again" ) html = get_html(url) if not html: print(f"HTML for {page + 1}/{pages}doesn't returned at all") return False try: get_hotel_information(html, city, checkin, checkout) url = get_next_page_href(html) except Exception as e: # with open(f"errors/Page {page + 1}/{pages}-{city}-week={week_number}.html", "w") as f: # f.write(html) print(e) print(f"Page {page + 1}/{pages} crashed, trying again") try: time.sleep(1) print(f"Parsing page {page + 1}/{pages} again") html = get_html(url) get_hotel_information(html, city, checkin, checkout) url = get_next_page_href(html) except Exception as e: print(e) print(f"Page {page + 1}/{pages} crashed, second TIME") continue time.sleep(3) city_id = City.query.filter( or_(City.ru_name == city.title(), City.eng_name == city.title())).first() avg_exist = db.session.query( db.exists().where(AvgPriceReviews.city_id == city_id.id).where( AvgPriceReviews.week_number == week_number).where( AvgPriceReviews.year == year)).scalar() if avg_exist: x = AvgPriceReviews.query.filter(AvgPriceReviews.city_id == city_id.id) \ .filter(AvgPriceReviews.week_number == week_number) \ .filter(AvgPriceReviews.year == year).first() x.avg_week_price = get_avg_price(city_id.id, week_number, year) x.avg_reviews = get_avg_reviews(city_id.id, week_number, year) x.avg_day_price = int(get_avg_price(city_id.id, week_number, year) / 7) x.parsing_date = parsing_date x.year = year db.session.commit() else: db.session.add( AvgPriceReviews( city=city_id, avg_reviews=get_avg_reviews(city_id.id, week_number, year), avg_week_price=get_avg_price(city_id.id, week_number, year), avg_day_price=int( get_avg_price(city_id.id, week_number, year) / 7), parsing_date=parsing_date, week_number=week_number, year=year)) db.session.commit() return True