Python EverydayHealthScraper.scrape 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: medinify.scrapers

메소드/함수: scrape

hotexamples.com에서의 예제들: 9

Python EverydayHealthScraper.scrape - 9개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 medinify.scrapers.EverydayHealthScraper.scrape에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

EverydayHealthScraper(24)

scrape(9)

get_url(5)

scrape_page(4)

get_drug_urls(1)

max_pages(1)

예제 #1

파일 보기

def test_scrape_no_reviews():
    """
    Tests that the scrape function works for page with no reviews
    """
    scraper = EverydayHealthScraper()
    scraper.scrape('https://www.everydayhealth.com/drugs/triprolidine/reviews')
    assert len(scraper.reviews) == 0

예제 #2

파일 보기

def test_scrape_empty_reviews():
    """
    Tests to make sure that the scrape function would discard the reviews
    of a scraper object that already has data collected in 'reviews'
    """
    scraper = EverydayHealthScraper()
    scraper.scrape('https://www.everydayhealth.com/drugs/phenadoz/reviews')
    num_reviews = len(scraper.reviews)
    scraper.scrape('https://www.everydayhealth.com/drugs/phenadoz/reviews')
    assert num_reviews == len(scraper.reviews)

예제 #3

파일 보기

def test_scrape_with_parameters():
    """
    Tests that, when calling the scrape function with a scraper of non-default parameters, the
    correct types of data are stored in the 'reviews' attribute
    """
    scraper = EverydayHealthScraper(collect_urls=True)
    scraper.scrape('https://www.everydayhealth.com/drugs/gabapentin/reviews')
    assert len(scraper.reviews) > 20
    data_collected = list(scraper.reviews[0].keys())
    assert len(data_collected) == 5
    assert 'url' in data_collected

예제 #4

파일 보기

def test_scrape_correct_review_data():
    """
    Tests to make sure that the last review in the scraped reviews list has
    the correct data when the scrape function is called
    (this data is from the oldest review of the drug)
    """
    scraper = EverydayHealthScraper(collect_urls=True)
    scraper.scrape(
        'https://www.everydayhealth.com/drugs/ciclopirox-topical/reviews')
    assert scraper.reviews[-1]['comment'][:10] == 'After OVER'
    assert scraper.reviews[-1]['comment'][-10:] == 'inally hav'
    assert scraper.reviews[-1]['rating'] == 5
    assert scraper.reviews[-1]['date'] == '5/22/2015 4:18:19 AM'

예제 #5

파일 보기

def test_scrape_default_parameter():
    """
    Tests that, when calling the scrape function with a scraper with default parameters,
    the correct types of data are stored in the 'reviews' attribute and that the
    correct number of reviews are collected (more than 20, this proves that it's
    scraping multiple pages)
    """
    scraper = EverydayHealthScraper()
    scraper.scrape('https://www.everydayhealth.com/drugs/gabapentin/reviews')
    assert len(scraper.reviews) > 20
    data_collected = list(scraper.reviews[0].keys())
    assert len(data_collected) == 4
    assert 'comment' in data_collected
    assert 'rating' in data_collected
    assert 'date' in data_collected
    assert 'drug' in data_collected

예제 #6

파일 보기

def test_scrape_invalid_url_no_title():
    """
    Tests that when the scrape function is called on a url that lacks a title
    (invalid url), it raises an AttributeError and returns 0
    """
    scraper = EverydayHealthScraper()
    returned = scraper.scrape('https://www.everydayhealth.com/drugs/')
    assert returned == 0

예제 #7

파일 보기

def test_everydayhealth_scrape():
    """Test everydayhealth scrape"""
    input_url = 'https://www.everydayhealth.com/drugs/citalopram/reviews'
    everydayhealth_scraper = EverydayHealthScraper()
    review_list = everydayhealth_scraper.scrape(input_url)
    assert len(review_list) > 5
    keys = list(review_list[-1].keys())
    assert 'comment' in keys
    assert 'rating' in keys

예제 #8

파일 보기

def test_scrape_assert_title_error():
    """
    Tests that when the scrape function is called with an invalid url that does have a
    title, but the title is wrong (doesn't have the phrase 'Drug Reviews') that an AssertionError
    is raised and the function returns 0
    """
    scraper = EverydayHealthScraper()
    returned = scraper.scrape('https://www.everydayhealth.com/drugs/')
    assert returned == 0

예제 #9

파일 보기

파일: reviewsScrapingDrug.py 프로젝트: dipperalbel/webdm-everydayhealth-reviews-scraper

def main():
    scraper = WebMDScraper(
    )  # non funziona DrugsScraper(), non funziona DrugRatingzScraper(), or EverydayHealthScraper()
    url = ""
    json_aggregrationReviews = {"website": "webmd.com"}
    json_aggregrationReviews["ratingSystem"] = "stars"
    json_aggregrationReviews["itemsNamesAggregration"] = input_list
    reviewsAggregrate = []
    for i in range(len(input_list)):
        json_reviews = {"name": input_list[i]}
        try:
            url = scraper.get_url(input_list[i])  # or any other drug name
            scraper.scrape(url)
            dataframe_reviews = pd.DataFrame.from_dict(scraper.reviews)
            json_reviews["averageEffectiveness"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["effectiveness"].mean(), 1)
            json_reviews["averageEaseOfUse"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["ease of use"].mean(), 1)
            json_reviews["averageSatisfaction"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].mean(), 1)
            json_reviews["minRating"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].min(), 1)
            json_reviews["maxRating"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].max(), 1)
            json_reviews["reviews"] = scraper.reviews
        except:
            print("Could not get " + input_list[i] + " from webmd website")
            webmd_names_errors.append(input_list[i])
        reviewsAggregrate.append(json_reviews)
    json_aggregrationReviews["aggregrateReviews"] = reviewsAggregrate

    with open("webmdresult.json", "w") as f:
        obj = json.dumps(json_aggregrationReviews, indent=4)
        f.write(obj)

    scraper2 = EverydayHealthScraper()
    json_aggregrationReviews = {"website": "everydayhealth.com"}
    json_aggregrationReviews["ratingSystem"] = "stars"
    json_aggregrationReviews["itemsNamesAggregration"] = input_list
    reviewsAggregrate = []
    for i in range(len(input_list)):
        json_reviews = {"name": input_list[i]}
        try:
            url = scraper2.get_url("Adderall")
            print(url)
            scraper2.scrape(url)
            dataframe_reviews = pd.DataFrame.from_dict(scraper2.reviews)
            json_reviews["averageRating"] = round(
                dataframe_reviews["rating"].mean(), 1)
            json_reviews["minRating"] = round(
                dataframe_reviews["rating"].min(), 1)
            json_reviews["maxRating"] = round(
                dataframe_reviews["rating"].max(), 1)
            json_reviews["reviews"] = scraper2.reviews
        except:
            print("Could not get " + input_list[i] +
                  " from everydayhealthscraper website ")
            everydayhealth_names_errors.append(input_list[i])
        reviewsAggregrate.append(json_reviews)

    json_aggregrationReviews["aggregrateReviews"] = reviewsAggregrate

    with open("everydayhealth.json", "w") as f:
        obj = json.dumps(json_aggregrationReviews, indent=4)
        f.write(obj)

    if (len(webmd_names_errors) != 0):
        print("I could not get from webmd " + str(webmd_names_errors))

    if (len(everydayhealth_names_errors) != 0):
        print("I could not get from everydayhealth " +
              str(everydayhealth_names_errors))