def safe_city_prices(city):
    """city - eng city name (Moscow)"""
    url = "https://www.numbeo.com/cost-of-living/in/{city}?displayCurrency=RUB".format(
        city=city)
    html = get_html(url)
    if not html:
        print("HTML for {city} live_prices doesn't returned".format(city=city))
        time.sleep(get_random_sleep_time())
        html = get_html(url)
        if not html:
            return False
    try:
        prices = get_live_prices(html)
    except Exception as e:
        print(e)
        print(
            "Wrong HTML for live prices {city} trying again".format(city=city))
        try:
            time.sleep(get_random_sleep_time())
            html = get_html(url)
            prices = get_live_prices(html)
        except Exception as e:
            print(e)
            print("Wrong HTML for live prices {city} second time".format(
                city=city))
            return False
    return prices
Ejemplo n.º 2
0
def safe_city_prices(city):
    """city - eng city name (Moscow)"""

    url = f"https://www.numbeo.com/cost-of-living/in/{city.title()}?displayCurrency=RUB"
    html = get_html(url)
    if not html:
        print(f"HTML for {city} live_prices doesn't returned")
        time.sleep(get_random_sleep_time())
        html = get_html(url)
        if not html:
            return False
    try:
        prices = get_live_prices(html)
    except Exception as e:
        print(e)
        print(f"Wrong HTML for live prices {city} trying again")
        try:
            time.sleep(get_random_sleep_time())
            html = get_html(url)
            prices = get_live_prices(html)
        except Exception as e:
            print(e)
            print(f"Wrong HTML for live prices {city} second time")
            return False
    update = City.query.filter_by(eng_name=city.lower()).first()
    update.inexpensive_meal_price = int(prices['Meal, Inexpensive Restaurant'])
    update.restaurant_2_persons = int(
        prices['Meal for 2 People, Mid-range Restaurant, Three-course'])
    update.water_033 = int(prices['Water (0.33 liter bottle)'])
    update.one_way_ticket = int(prices['One-way Ticket (Local Transport)'])
    update.internet = int(
        prices['Internet (60 Mbps or More, Unlimited Data, Cable/ADSL)'])
    db.session.commit()
    return True
Ejemplo n.º 3
0
def test_get_living_prices():
    with app.app_context():
        url = "https://www.numbeo.com/cost-of-living/in/{}?displayCurrency=RUB".format(
            "Moscow")
        html = get_html(url)
        result = get_live_prices(html)
        assert result
Ejemplo n.º 4
0
def get_all_hotels(city, checkin, checkout):
    """Parsing all hotels in the city in 7 days range,
       adding all hotels information and averageinfo in db

    params:
    - city: string object, city name in russian
    - checkin: string object, checkin date in format dd/mm/YYYY
    - checkout: string object, checkout date in format dd/mm/YYYY

    return: bool object "False or True"
    """

    parsing_date = datetime.now(timezone("Europe/Moscow")).strftime("%d/%m/%Y")
    url = get_url(city, checkin, checkout)
    week_number = int(datetime.strptime(checkin, "%d/%m/%Y").strftime("%W"))
    year = int(datetime.strptime(checkin, "%d/%m/%Y").strftime("%Y"))
    html = get_html(url)
    if not html:
        print("First HTML doesn't returned, requesting again")
        time.sleep(get_random_sleep_time())
        html = get_html(url)
        if not html:
            print("First HTML doesn't returned at all")
            return False
    try:
        pages = get_page_count(html)
    except Exception as e:
        print(e)
        print(f"HTML for pages, {city}-{checkin}-{checkout} doesn't returned")
        return False
    print(f"Parsing process {city} - {checkin} - {checkout} - started")

    for page in range(pages - 1):
        html = get_html(url)
        if not html:
            time.sleep(get_random_sleep_time())
            print(f"HTML for {page + 1}/{pages} doesn't returned, requesting again")
            html = get_html(url)
            if not html:
                time.sleep(get_random_sleep_time())
                print(f"HTML for {page + 1}/{pages} doesn't returned, requesting again 2")
                html = get_html(url)
                if not html:
                    print(f"HTML for {page + 1}/{pages}doesn't returned at all")
                    return True
        try:
            get_hotel_information(html, city, checkin, checkout)
            url = get_next_page_href(html)
        except Exception as e:
            print(e)
            print(f"Page {page + 1}/{pages} crashed, trying again")
            try:
                time.sleep(get_random_sleep_time())
                print(f"Parsing page {page + 1}/{pages} again")
                html = get_html(url)
                get_hotel_information(html, city, checkin, checkout)
                url = get_next_page_href(html)
            except Exception as e:
                print(e)
                print(f"Page {page + 1}/{pages} crashed, second TIME")
                continue
        # print(f"page {page + 1}/{pages} parsed  time={datetime.now()}")
        time.sleep(get_random_sleep_time())
    city_id = City.query.filter(or_(City.ru_name == city.lower(),
                                    City.eng_name == city.lower())).first().id
    avg_exist = db.session.query(
                    db.exists().where(AvgPriceReviews.city_id == city_id)
                               .where(AvgPriceReviews.week_number == week_number)
                               .where(AvgPriceReviews.year == year)).scalar()
    if avg_exist:
        x = AvgPriceReviews.query.filter(AvgPriceReviews.city_id == city_id) \
                                 .filter(AvgPriceReviews.week_number == week_number) \
                                 .filter(AvgPriceReviews.year == year).first()
        x.avg_week_price = get_avg_price(city_id, week_number, year)
        x.avg_reviews = get_avg_reviews(city_id, week_number, year)
        x.avg_day_price = int(get_avg_price(city_id, week_number, year) / 7)
        x.parsing_date = parsing_date
        x.year = year
        db.session.commit()
    else:
        db.session.add(AvgPriceReviews(
            city_id=city_id,
            avg_reviews=get_avg_reviews(city_id, week_number, year),
            avg_week_price=get_avg_price(city_id, week_number, year),
            avg_day_price=int(get_avg_price(city_id, week_number, year) / 7),
            parsing_date=parsing_date,
            week_number=week_number,
            year=year)
        )
        db.session.commit()
    return True
Ejemplo n.º 5
0
def repeat_get_html(url):
    html = get_html(url)
    if not html:
        return False
    return html
Ejemplo n.º 6
0
def get_all_hotels(city, checkin, checkout):
    parsing_date = datetime.now(timezone("Europe/Moscow")).strftime("%d-%m-%Y")
    # url = get_url(city, checkin, checkout)
    url = "https://www.booking.com/"
    week_number = int(datetime.strptime(checkin, "%d/%m/%Y").strftime("%W"))
    year = int(datetime.strptime(checkin, "%d/%m/%Y").strftime("%Y"))
    html = get_html(url)
    if not html:
        print("First HTML doesn't returned, requesting again")
        time.sleep(1)
        html = get_html(url)
        if not html:
            print("First HTML doesn't returned at all")
            return False
    try:
        pages = get_page_count(html)
    except Exception as e:
        # with open(f"errors/Pages for {city} - week={week_number}.html", "w") as f:
        #     f.write(html)
        print(e)
        print(f"HTML for pages, {city}-{checkin}-{checkout} doesn't returned")
        return False
    print(f"Parsing process {city} - {checkin} - {checkout} - started")

    for page in range(pages - 1):
        html = get_html(url)
        if not html:
            time.sleep(1)
            print(
                f"HTML for {page + 1}/{pages} doesn't returned, requesting again"
            )
            html = get_html(url)
            if not html:
                print(f"HTML for {page + 1}/{pages}doesn't returned at all")
                return False

        try:
            get_hotel_information(html, city, checkin, checkout)
            url = get_next_page_href(html)
        except Exception as e:
            # with open(f"errors/Page {page + 1}/{pages}-{city}-week={week_number}.html", "w") as f:
            #     f.write(html)
            print(e)
            print(f"Page {page + 1}/{pages} crashed, trying again")
            try:
                time.sleep(1)
                print(f"Parsing page {page + 1}/{pages} again")
                html = get_html(url)
                get_hotel_information(html, city, checkin, checkout)
                url = get_next_page_href(html)
            except Exception as e:
                print(e)
                print(f"Page {page + 1}/{pages} crashed, second TIME")
                continue
        time.sleep(3)

    city_id = City.query.filter(
        or_(City.ru_name == city.title(),
            City.eng_name == city.title())).first()
    avg_exist = db.session.query(
        db.exists().where(AvgPriceReviews.city_id == city_id.id).where(
            AvgPriceReviews.week_number == week_number).where(
                AvgPriceReviews.year == year)).scalar()
    if avg_exist:
        x = AvgPriceReviews.query.filter(AvgPriceReviews.city_id == city_id.id) \
                                 .filter(AvgPriceReviews.week_number == week_number) \
                                 .filter(AvgPriceReviews.year == year).first()
        x.avg_week_price = get_avg_price(city_id.id, week_number, year)
        x.avg_reviews = get_avg_reviews(city_id.id, week_number, year)
        x.avg_day_price = int(get_avg_price(city_id.id, week_number, year) / 7)
        x.parsing_date = parsing_date
        x.year = year
        db.session.commit()
    else:
        db.session.add(
            AvgPriceReviews(
                city=city_id,
                avg_reviews=get_avg_reviews(city_id.id, week_number, year),
                avg_week_price=get_avg_price(city_id.id, week_number, year),
                avg_day_price=int(
                    get_avg_price(city_id.id, week_number, year) / 7),
                parsing_date=parsing_date,
                week_number=week_number,
                year=year))
        db.session.commit()
    return True