def __gatherWebMD(forumDetails): url = forumDetails[0] treatment = forumDetails[1] # WebMD is problamtic in that it provides three types of ratings. To simplify the problem we will only be pulling the satisfaction rating, as this # seems to be the most general one forumName = 'Webmd.com' results = requests.get(url) soup = BeautifulSoup(results.content, 'html.parser') rawPosts = soup.find_all(name="div", attrs={'class': 'userPost'}) posts = [] maxRating = 5 for rawPost in rawPosts: try: review = rawPost.find(name="p", attrs={ 'id': re.compile('comFull.*') }).get_text() rating = rawPost.find(name="div", attrs={ 'class': 'catRatings lastEl clearfix' }).get_text() except: print( " Error[__gatherWebMD(url)]= Threw an error whilst looking for a review or rating" ) continue # Removing special characters here review = messageCleaner.removeSpecialCharacter(review) review = review.replace("Comment:", "") review = review.replace("Hide Full Comment", "") rating = re.findall('\d+', rating) if (review and rating): # As mentioned, webmd provide three forumPost = post(review, __scaleRatings(rating[0], maxRating), url) posts.append(forumPost) webMD = forum(forumName, url, maxRating, posts, treatment) return webMD
def __gatherDrugsCom(forumDetails): url = forumDetails[0] treatment = forumDetails[1] forumName = 'Drugs.com' results = requests.get(url) soup = BeautifulSoup(results.content, 'html.parser') rawPosts = soup.find_all(name="div", attrs={'class': 'boxList'}) posts = [] maxRating = 10 # Extract the reviews and the ratings. If a post does not provide a review and a rating we will # dismiss it because it will not be useful during the learning phase for rawPost in rawPosts: try: review = rawPost.find(name="div", attrs={'class': 'user-comment'}) review = review.find(name="span").get_text() rating = rawPost.find(name="div", attrs={ 'class': 'rating-score' }).get_text() except: logger.error( " Error[__gatherDrugsCom(url)] = Threw an error whilst looking for a review or rating" ) continue # Removing special characters here review = messageCleaner.removeSpecialCharacter(review) if (review and rating): forumPost = post(review, __scaleRatings(rating, maxRating), url) posts.append(forumPost) drugsComForum = forum(forumName, url, maxRating, posts, treatment) return drugsComForum
def __gatherWebMD(forumDetails): url = forumDetails[0] treatment = forumDetails[1] # WebMD is problamtic in that it provides three types of ratings. To simplify the problem we will only be pulling the satisfaction rating, as this # seems to be the most general one forumName = 'Webmd.com' results = requests.get(url) soup = BeautifulSoup(results.content, 'html.parser') rawPosts = soup.find_all(name="div", attrs = {'class' : 'userPost'}) posts = [] maxRating = 5 for rawPost in rawPosts: try: review = rawPost.find(name = "p" , attrs = {'id' : re.compile('comFull.*')}).get_text() rating = rawPost.find(name = "div" , attrs = {'class' : 'catRatings lastEl clearfix'}).get_text() except: print(" Error[__gatherWebMD(url)]= Threw an error whilst looking for a review or rating") continue # Removing special characters here review = messageCleaner.removeSpecialCharacter(review) review = review.replace("Comment:", "") review = review.replace("Hide Full Comment", "") rating = re.findall('\d+', rating) if(review and rating): # As mentioned, webmd provide three forumPost = post(review, __scaleRatings(rating[0], maxRating), url) posts.append(forumPost) webMD = forum(forumName, url, maxRating, posts, treatment) return webMD
def __gatherDrugsCom(forumDetails): url = forumDetails[0] treatment = forumDetails[1] forumName = 'Drugs.com' results = requests.get(url) soup = BeautifulSoup(results.content, 'html.parser') rawPosts = soup.find_all(name = "div", attrs = {'class' : 'boxList'}) posts = [] maxRating = 10 # Extract the reviews and the ratings. If a post does not provide a review and a rating we will # dismiss it because it will not be useful during the learning phase for rawPost in rawPosts: try: review = rawPost.find(name = "div", attrs = {'class' : 'user-comment'}) review = review.find(name = "span").get_text() rating = rawPost.find(name = "div", attrs = {'class' : 'rating-score'}).get_text() except: logger.error(" Error[__gatherDrugsCom(url)] = Threw an error whilst looking for a review or rating") continue # Removing special characters here review = messageCleaner.removeSpecialCharacter(review) if(review and rating): forumPost = post(review, __scaleRatings(rating, maxRating), url) posts.append(forumPost) drugsComForum = forum(forumName , url, maxRating, posts, treatment) return drugsComForum