def get_trending_headlines(url): response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, "html.parser") for tag in soup.find_all("span", {"class": "video_icon_ss"}): tag.parent.parent.decompose() a_tags = soup.find("div", id="left").find("div", { "class": "flex-box" }).find_all("a") headlines = remove_duplicate_entries(map(get_headline_details, a_tags), "link") return headlines return None
def get_trending_headlines(url): response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, "html.parser") soup.find("div", {"class": "opinion_opt"}).decompose() # Some anchor tags in div[class="lhs_col_two"] are not parsed by the following a_tags = soup.find("div", { "class": "hmpage_lhs" }).find_all("a", {"class": "item-title"}) headlines = remove_duplicate_entries(map(get_headline_details, a_tags), "link") return headlines return None
def get_trending_headlines(url): response = requests.get(url) if response.status_code == 200: soup = BeautifulSoup(response.text, "html.parser") soup.find("div", {"class": "latestnews-left"}).decompose() soup.find("div", {"class": "advertisement-250"}).decompose() # to remove sponsered content # not sure if tag works every time soup.find("div", {"class": "top-thumb mt-20"}).decompose() a_tags = soup.find("div", { "class": "news-area newtop-block mb-5 mt-10" }).find_all("a") headlines = remove_duplicate_entries(map(get_headline_details, a_tags), "link", "title") return headlines return None