Ejemplo n.º 1
0
    def __get_youtube_id_fallback(self, track, cancellable):
        """
            Get youtube id (fallback)
            @param track as Track
            @param cancellable as Gio.Cancellable
            @return youtube id as str
        """
        try:
            from beautifulsoup4 import BeautifulSoup
        except:
            print("$ sudo pip3 install beautifulsoup4")
            return None
        try:
            unescaped = "%s %s" % (track.artists[0], track.name)
            search = GLib.uri_escape_string(unescaped.replace(" ", "+"), None,
                                            True)
            uri = "https://www.youtube.com/results?search_query=%s" % search
            (status,
             data) = App().task_helper.load_uri_content_sync(uri, cancellable)
            if not status:
                return None

            html = data.decode("utf-8")
            soup = BeautifulSoup(html, "html.parser")
            ytems = []
            for link in soup.findAll("a"):
                href = link.get("href")
                title = link.get("title")
                if href is None or title is None:
                    continue
                if href.startswith("/watch?v="):
                    href = href.replace("/watch?v=", "")
                    ytems.append((href, title))
            dic = {}
            best = self.__BAD_SCORE
            for (yid, title) in ytems:
                score = self.__get_youtube_score(title, track.name,
                                                 track.artists[0],
                                                 track.album.name)
                if score < best:
                    best = score
                elif score == best:
                    continue  # Keep first result
                dic[score] = yid
            # Return url from first dic item
            if best == self.__BAD_SCORE:
                return None
            else:
                return dic[best]
        except Exception as e:
            Logger.warning("YouTubeHelper::__get_youtube_id_fallback(): %s", e)
            self.__fallback = True
        return None
Ejemplo n.º 2
0
from selenium import webdriver
from beautifulsoup4 import BeautifulSoup
import pandas as pd

driver = webdriver.Chrome("/usr/lib/chromium-browser/chromedriver")

products = []
prices = []
ratings = []
driver.get(
    "https://www.flipkart.com/gaming-laptops-store?otracker=nmenu_sub_Electronics_0_Gaming%20Laptops&otracker=nmenu_sub_Electronics_0_Gaming%20Laptops"
)

content = driver.page_source
soup = BeautifulSoup(content)

for a in soup.findAll('a', href=True, attrs={'class': '_2cLu-l'}):
    name = a.find('div', attrs={'class': '_2cLu-l'})
    price = a.find('div', attrs={'class': '_1vC4OE'})
    ratings = a.find('div', attrs={'class': 'hGSR34'})
    products.append(name.text)
    price.append(price.text)
    ratings.append(rating.text)
Ejemplo n.º 3
0
toi_headings = toi_soup.find_all('h2')

toi_headings = toi_headings[0:-13]  # removing footers

toi_news = []

for th in toi_headings:
    if len(th.text) < 25:
        continue
    toi_news.append(th.text)

#Getting news from Hindustan times

ht_r = requests.get("https://www.hindustantimes.com/india-news/")
ht_soup = BeautifulSoup(ht_r.content, 'html5lib')
ht_headings = ht_soup.findAll("div", {"class": "headingfour"})
ht_headings = ht_headings[2:]
ht_news = []

for hth in ht_headings:
    if len(hth.text) < 25:
        continue
    ht_news.append(hth.text)

#getting news From The Hindu

main_url = " https://newsapi.org/v1/articles?source=the-hindu&sortBy=top&apiKey=98cb343e477940f181f5b38bdb9e3f9d"
# fetching data in json format
open_th_page = requests.get(main_url).json()

# getting all articles in a string article