Python EverydayHealthScraper.get_url 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: medinify.scrapers

메소드/함수: get_url

hotexamples.com에서의 예제들: 5

Python EverydayHealthScraper.get_url - 5개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 medinify.scrapers.EverydayHealthScraper.get_url에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

EverydayHealthScraper(24)

scrape(9)

get_url(5)

scrape_page(4)

get_drug_urls(1)

max_pages(1)

예제 #1

파일 보기

def test_short_drug_name():
    """
    Tests that the get_url function does not search for drug names shorter than 4 characters
    """
    scraper = EverydayHealthScraper()
    url = scraper.get_url('ACE')
    assert not url

예제 #2

파일 보기

def test_get_url_real_drug_name():
    """
    Tests that the get_url function returns the correct url for a standard drug name ('dupixent')
    """
    scraper = EverydayHealthScraper()
    url = scraper.get_url('dupixent')
    assert url == 'https://www.everydayhealth.com/drugs/dupixent/reviews'

예제 #3

파일 보기

def test_drug_name_with_space():
    """
    Tests that the get_url function returns the correct url for a drug name
    that includes a space
    """
    scraper = EverydayHealthScraper()
    url = scraper.get_url('Mucinex Allergy')
    assert url == 'https://www.everydayhealth.com/drugs/mucinex-allergy/reviews'

예제 #4

파일 보기

def test_url_fake_drug_name():
    """
    Tests tha the get_url function returns 'None' for a drug name that does not have a review
    page
    """
    scraper = EverydayHealthScraper()
    url = scraper.get_url('garbage')
    assert not url

예제 #5

파일 보기

파일: reviewsScrapingDrug.py 프로젝트: dipperalbel/webdm-everydayhealth-reviews-scraper

def main():
    scraper = WebMDScraper(
    )  # non funziona DrugsScraper(), non funziona DrugRatingzScraper(), or EverydayHealthScraper()
    url = ""
    json_aggregrationReviews = {"website": "webmd.com"}
    json_aggregrationReviews["ratingSystem"] = "stars"
    json_aggregrationReviews["itemsNamesAggregration"] = input_list
    reviewsAggregrate = []
    for i in range(len(input_list)):
        json_reviews = {"name": input_list[i]}
        try:
            url = scraper.get_url(input_list[i])  # or any other drug name
            scraper.scrape(url)
            dataframe_reviews = pd.DataFrame.from_dict(scraper.reviews)
            json_reviews["averageEffectiveness"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["effectiveness"].mean(), 1)
            json_reviews["averageEaseOfUse"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["ease of use"].mean(), 1)
            json_reviews["averageSatisfaction"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].mean(), 1)
            json_reviews["minRating"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].min(), 1)
            json_reviews["maxRating"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].max(), 1)
            json_reviews["reviews"] = scraper.reviews
        except:
            print("Could not get " + input_list[i] + " from webmd website")
            webmd_names_errors.append(input_list[i])
        reviewsAggregrate.append(json_reviews)
    json_aggregrationReviews["aggregrateReviews"] = reviewsAggregrate

    with open("webmdresult.json", "w") as f:
        obj = json.dumps(json_aggregrationReviews, indent=4)
        f.write(obj)

    scraper2 = EverydayHealthScraper()
    json_aggregrationReviews = {"website": "everydayhealth.com"}
    json_aggregrationReviews["ratingSystem"] = "stars"
    json_aggregrationReviews["itemsNamesAggregration"] = input_list
    reviewsAggregrate = []
    for i in range(len(input_list)):
        json_reviews = {"name": input_list[i]}
        try:
            url = scraper2.get_url("Adderall")
            print(url)
            scraper2.scrape(url)
            dataframe_reviews = pd.DataFrame.from_dict(scraper2.reviews)
            json_reviews["averageRating"] = round(
                dataframe_reviews["rating"].mean(), 1)
            json_reviews["minRating"] = round(
                dataframe_reviews["rating"].min(), 1)
            json_reviews["maxRating"] = round(
                dataframe_reviews["rating"].max(), 1)
            json_reviews["reviews"] = scraper2.reviews
        except:
            print("Could not get " + input_list[i] +
                  " from everydayhealthscraper website ")
            everydayhealth_names_errors.append(input_list[i])
        reviewsAggregrate.append(json_reviews)

    json_aggregrationReviews["aggregrateReviews"] = reviewsAggregrate

    with open("everydayhealth.json", "w") as f:
        obj = json.dumps(json_aggregrationReviews, indent=4)
        f.write(obj)

    if (len(webmd_names_errors) != 0):
        print("I could not get from webmd " + str(webmd_names_errors))

    if (len(everydayhealth_names_errors) != 0):
        print("I could not get from everydayhealth " +
              str(everydayhealth_names_errors))