def test_name_with_numbers_and_spaces():
    """
    Tests get_url function on drug name with numbers and spaces
    """
    scraper = WebMDScraper()
    url = scraper.get_url('7-Keto DHEA powder')
    assert url == 'https://www.webmd.com/drugs/drugreview-149048-7-Keto-DHEA.aspx?drugid=149048&drugname=7-Keto-DHEA'
def test_short_drug_name():
    """
    Tests that the get_url function does not search for drug names shorter than 4 characters
    """
    scraper = WebMDScraper()
    url = scraper.get_url('ACE')
    assert not url
def test_url_fake_drug_name():
    """
    Tests that the get_url function returns 'None' for a drug name that does not have a review page
    """
    scraper = WebMDScraper()
    url = scraper.get_url('garbage')
    assert not url
def test_get_url_real_drug_name():
    """
    Tests that the get_url function returns the correct url for a standard drug name ('actos')
    """
    scraper = WebMDScraper()
    url = scraper.get_url('actos')
    assert url == 'https://www.webmd.com/drugs/drugreview-17410-Actos-oral.aspx?drugid=17410&drugname=Actos-oral'
def test_name_with_numbers():
    """
    Tests get_url function on drug name with numbers
    """
    scraper = WebMDScraper()
    url = scraper.get_url('12.5CPD-1DCPM-30PSE')
    assert url == 'https://www.webmd.com/drugs/drugreview-150612-dexchlorphen-p-phed-' \
                  'chlophedianol-oral.aspx?drugid=150612&drugname=dexchlorphen-p-phed-chlophedianol-oral'
def test_drug_name_with_space():
    """
    Tests that the get_url function returns the correct url for a drug name with a space in it
    """
    scraper = WebMDScraper()
    url = scraper.get_url('Methotrexate Vial')
    assert url == 'https://www.webmd.com/drugs/drugreview-5659-' \
                  'methotrexate-sodium-injection.aspx?drugid=5659&drugname=methotrexate-sodium-injection'
Ejemplo n.º 7
0
def main():
    scraper = WebMDScraper(
    )  # or DrugsScraper(), DrugRatingsScraper(), or EverydayHealthScraper()
    url = scraper.get_url('Galzin')  # or any other drug name
    scraper.scrape(url)
    print('Scraped %d reviews.' % len(scraper.reviews))
def main():
    scraper = WebMDScraper(
    )  # non funziona DrugsScraper(), non funziona DrugRatingzScraper(), or EverydayHealthScraper()
    url = ""
    json_aggregrationReviews = {"website": "webmd.com"}
    json_aggregrationReviews["ratingSystem"] = "stars"
    json_aggregrationReviews["itemsNamesAggregration"] = input_list
    reviewsAggregrate = []
    for i in range(len(input_list)):
        json_reviews = {"name": input_list[i]}
        try:
            url = scraper.get_url(input_list[i])  # or any other drug name
            scraper.scrape(url)
            dataframe_reviews = pd.DataFrame.from_dict(scraper.reviews)
            json_reviews["averageEffectiveness"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["effectiveness"].mean(), 1)
            json_reviews["averageEaseOfUse"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["ease of use"].mean(), 1)
            json_reviews["averageSatisfaction"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].mean(), 1)
            json_reviews["minRating"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].min(), 1)
            json_reviews["maxRating"] = round(
                pd.DataFrame.from_records(
                    dataframe_reviews["rating"])["satisfaction"].max(), 1)
            json_reviews["reviews"] = scraper.reviews
        except:
            print("Could not get " + input_list[i] + " from webmd website")
            webmd_names_errors.append(input_list[i])
        reviewsAggregrate.append(json_reviews)
    json_aggregrationReviews["aggregrateReviews"] = reviewsAggregrate

    with open("webmdresult.json", "w") as f:
        obj = json.dumps(json_aggregrationReviews, indent=4)
        f.write(obj)

    scraper2 = EverydayHealthScraper()
    json_aggregrationReviews = {"website": "everydayhealth.com"}
    json_aggregrationReviews["ratingSystem"] = "stars"
    json_aggregrationReviews["itemsNamesAggregration"] = input_list
    reviewsAggregrate = []
    for i in range(len(input_list)):
        json_reviews = {"name": input_list[i]}
        try:
            url = scraper2.get_url("Adderall")
            print(url)
            scraper2.scrape(url)
            dataframe_reviews = pd.DataFrame.from_dict(scraper2.reviews)
            json_reviews["averageRating"] = round(
                dataframe_reviews["rating"].mean(), 1)
            json_reviews["minRating"] = round(
                dataframe_reviews["rating"].min(), 1)
            json_reviews["maxRating"] = round(
                dataframe_reviews["rating"].max(), 1)
            json_reviews["reviews"] = scraper2.reviews
        except:
            print("Could not get " + input_list[i] +
                  " from everydayhealthscraper website ")
            everydayhealth_names_errors.append(input_list[i])
        reviewsAggregrate.append(json_reviews)

    json_aggregrationReviews["aggregrateReviews"] = reviewsAggregrate

    with open("everydayhealth.json", "w") as f:
        obj = json.dumps(json_aggregrationReviews, indent=4)
        f.write(obj)

    if (len(webmd_names_errors) != 0):
        print("I could not get from webmd " + str(webmd_names_errors))

    if (len(everydayhealth_names_errors) != 0):
        print("I could not get from everydayhealth " +
              str(everydayhealth_names_errors))