Ejemplos de bss en Python, ejemplos de bs4.bss en Python

Ejemplo n.º 1

0

Mostrar archivo

 def get_user_lists(self):
     """
     Return all the imdb lists for this user.
     """
     if not self.imdb_user:
         return []
     ulist = []
     url = self.users_url.format(userid=self.imdb_user)
     result = client.request(url)
     soup = bss(result, "html.parser")
     items = soup.findAll("div", {"class": "user-list"})
     for item in items:
         list_id = item['id']
         list_name = item.find("a", {"class": "list-name"}).get_text()
         if self.title_type == 'tvSeries':
             url = ("q=imdbUserList&listId={}").format(list_id)
         else:
             url = ("q=imdbUserList&listId={}").format(list_id)
         ulist.append({
             'name': list_name,
             'id': list_id,
             'url': url,
             'tvdb': '0'
         })
     return ulist

Ejemplo n.º 2

0

Mostrar archivo

def acestream_channels():
    """
    Return a list of tvshows from acestream channels url
    """
    items = []
    r = requests.get(CHANNELS_URL)
    soup = bss(r.text, 'html.parser')
    for i in soup.find('table').findAll('tr'):
        try:
            stream_name = i.find('td').renderContents()
            if not stream_name:
                continue
        except:
            continue
        try:
            ace_link = i.find('a').get('href')
        except:
            continue
        try:
            style = i.find('a').get('style')
        except:
            pass
        if not style:
            color = 'grey'
        elif 'red' in style:
            color = 'red'
        elif 'green' in style:
            color = 'green'
        else:
            color = 'blue'
        items.append({'url': ace_link, 'color': color, 'desc': stream_name})
    return items

Ejemplo n.º 3

0

Mostrar archivo

Archivo: get3.py Proyecto: cnchanghai/ctest

def get3015():
    url = 'https://10.65.31.18/hp/device/this.LCDispatcher?nav=hp.Usage'
    r = requests.get(url, verify=False)
    content = r.content
    soup = bss(content, 'lxml')
    taa = soup.findAll('table', id='tbl-1847')[-1]
    tab = taa.findAll('div', 'hpPageText')[-1].text
    print tab

Ejemplo n.º 4

0

Mostrar archivo

Archivo: get2.py Proyecto: cnchanghai/ctest

 def run(self):
     url = 'http://' + self.ip + '/hp/device/this.LCDispatcher?nav=hp.Usage'
     r = requests.get(url, verify=False)
     content = r.content
     soup = bss(content, 'html5lib')
     taa = soup.findAll('table', 'hpTable')[-1]
     tab = taa.findAll('span', 'hpPageText')[-1].text
     print tab

Ejemplo n.º 5

0

Mostrar archivo

Archivo: imdb_lists.py Proyecto: scooters5670/plugin.video.maximus

    def get_imdb_url_contents(self, url, *args):
        """
        Retrieve the list of shows for the given url
        """
        if not url:
            return []

        # Update the page limit to our setting
        path, params = self.url_decode(url)
        params.update({'count': self.count})
        url = "{}/{}?{}".format(self.base_url, path,
                                self.params_encode(params))

        log_utils.log("Updated URL: {}".format(url))

        results_list = []
        result = client.request(url)
        soup = bss(result, "html.parser")

        next_url = self.get_next_link(path, soup)

        for li in soup.findAll("div", {"class": "lister-item"}):
            title = li.find("h3", {
                "class": "lister-item-header"
            }).find('a').getText()
            year_raw = li.find("span", {"class": "lister-item-year"}).getText()
            try:
                year = int(re.search('(\d+)', year_raw).group(0))
            except:
                year = 'TBD'
            try:
                rating = li.find("div", {
                    "class": "ratings-imdb-rating"
                }).find("strong").get_text()
            except:
                rating = '?'
            plot = li.find("p", {"class": ""}).getText().strip()
            imdb = li.find("div", {
                "class": "lister-item-image"
            }).find("img")['data-tconst']
            poster = li.find("div", {
                "class": "lister-item-image"
            }).find("img")['loadlate']
            results_list.append({
                'title': title,
                'originaltitle': title,
                'year': year,
                'rating': rating,
                'plot': plot,
                'imdb': imdb,
                'poster': poster,
                'tvdb': '0',
                'next': next_url,
            })
        return results_list

Ejemplo n.º 6

0

Mostrar archivo

def acesearch(term):
    """
    Search URL for acestreams and return list of dictionaries containing
    the name and acestream URL
    """
    r = requests.post(SEARCH_URL, data={'cn': term})
    soup = bss(r.text, "html.parser")
    items = []
    for i in soup.findAll('a', {'class': 'list-group-item'}):
        items.append({'url': i['href'], 'desc': i.contents[0]})
    return items

Ejemplo n.º 7

0

Mostrar archivo

Archivo: prtmon.py Proyecto: cnchanghai/ctest

 def run(self):
     global prtnum
     try:
         url = 'https://' + self.ip + '/hp/device/this.LCDispatcher?nav=hp.Usage'
         r = requests.get(url, verify=False)
         content = r.content
         soup = bss(content, 'html5lib')
         taa = soup.findAll('table', id='tbl-1847')[-1]
         tab = taa.findAll('div', 'hpPageText')[-1].text
         mylock.acquire()
         prtnum[self.ip] = tab
         mylock.release()
     except:
         mylock.acquire()
         prtnum[self.ip] = 0
         mylock.release()

Ejemplo n.º 8

0

Mostrar archivo

 def get_imdb_url_contents(self, url, *args):
     """
     Retrieve the list of shows for the given url
     """
     if not url:
         return []
     results_list = []
     result = client.request(url)
     soup = bss(result, "html.parser")
     for li in soup.findAll("div", {"class": "lister-item"}):
         title = li.find("h3", {
             "class": "lister-item-header"
         }).find('a').getText()
         year_raw = li.find("span", {"class": "lister-item-year"}).getText()
         try:
             year = int(re.search('(\d+)', year_raw).group(0))
         except:
             year = 'TBD'
         try:
             rating = li.find("div", {
                 "class": "ratings-imdb-rating"
             }).find("strong").get_text()
         except:
             rating = '?'
         plot = li.find("p", {"class": ""}).getText().strip()
         imdb = li.find("div", {
             "class": "lister-item-image"
         }).find("img")['data-tconst']
         poster = li.find("div", {
             "class": "lister-item-image"
         }).find("img")['loadlate']
         results_list.append({
             'title': title,
             'originaltitle': title,
             'year': year,
             'rating': rating,
             'plot': plot,
             'imdb': imdb,
             'poster': poster,
             'tvdb': '0',
         })
     return results_list

Ejemplo n.º 9

0

Mostrar archivo

Archivo: The_Kavery.py Proyecto: Narendran701/simple

    def kavery(self):
        base_url = 'http://kavery.org.in/placement11-12.aspx'
        head = {'User-Agent': 'Mozilla/5.0 (Linux; <Android Version>'}

        req = requests.get(url=base_url, headers=head)
        soup = bss(req.content, 'html.parser')

        lokt = soup.find('table')
        t_row = lokt.find_all('tr')
        lst = []

        for tm in t_row:
            td = tm.find_all('td')
            row = [tp.get_text() for tp in td if tp.get_text()]
            lst.append(row)
        st = 0
        l = ['s.No', 'Name', 'Placed']
        print(l)
        while st <= 27:
            print(lst[st])
            st += 1

Ejemplo n.º 10

0

Mostrar archivo

Archivo: scrap ss.py Proyecto: annaraysk/quiz-flask

def updateQuestions(urllink):
    s = requests.get(urllink)
    soup = bss(s.text, features='html.parser')
    l = soup.findAll('p')
    fl = 0
    i = 0
    for j, i in enumerate(l):
        try:
            if re.search('\d[\.][ ]', i.text):
                q = str(i.text)
                q = q[q.index(' ') + 1:]
                q, abcd = q.split('(a) ')
                o1, bcd = abcd.split('(b) ')
                o2, cd = bcd.split('(c) ')
                o3, o4 = cd.split('(d) ')
            if str(i.text).startswith('Answer: '):
                ans = str(i.text)
                ans = ans[ans.index(' ') + 1:]
                updateDB([q, o1, o2, o3, o4, ans, ''])
        except:
            continue

Ejemplo n.º 11

0

Mostrar archivo

import requests
import re

url = "webmail.daiict.ac.in"
master = 'https://webmail.daiict.ac.in/'
r = requests.get(master)
br = Browser()
br.set_handle_robots(False)
br.open("https://webmail.daiict.ac.in/zimbra/")
br.select_form(name='loginForm')
br.form['username'] = WEBMAIL_ID
br.form['password'] = WEBMAIL_PASS
br.submit()

response = br.response().read()
pool = bss(response)
messages = pool.find_all('tbody', {'id': 'mess_list_tbody'})[0]
messages = messages.findAll('a', href=True)
queue = []

for message in messages:
    ID = message['id']
    queue.append(ID)

delay = 3
wait = WebDriverWait(driver, delay)

for each in queue:
    message = wait.until(EC.element_to_be_clickable((By.ID, each)))
    message.click()
    forward = wait.until(EC.element_to_be_clickable((By.ID, 'OPFORW')))

Ejemplo n.º 12

0

Mostrar archivo

def html2unicode(text):
    """Converts HTML entities to unicode.  For example '&amp;' becomes '&'."""
    text = unicode(bss(text, convertEntities=bss.ALL_ENTITIES))
    return text

Ejemplo n.º 13

0

Mostrar archivo

Archivo: scrap math.py Proyecto: annaraysk/quiz-flask

def updateQuestions(urllink):
    s = requests.get(urllink)
    soup = bss(s.text, features='html.parser')
    l = soup.findAll('p')
    fl = 0
    i = 0
    while (i < len(l)):
        try:
            if re.search('\d[\.][ ]\w+', l[i].text):
                q = str(l[i].text)
                q = q[q.index(' ') + 1:]
                i += 1
                while (True):
                    if str(l[i].text).startswith('(A) '):
                        o1 = str(l[i].text)
                        o1 = o1[o1.index(' ') + 1:]
                        i += 1
                        break
                    else:
                        i += 1
                while (True):
                    if str(l[i].text).startswith('(B) '):
                        o2 = str(l[i].text)
                        o2 = o2[o2.index(' ') + 1:]
                        i += 1
                        break
                    else:
                        i += 1
                while (True):
                    if str(l[i].text).startswith('(C) '):
                        o3 = str(l[i].text)
                        o3 = o3[o3.index(' ') + 1:]
                        i += 1
                        break
                    else:
                        i += 1
                while (True):
                    if str(l[i].text).startswith('(D) '):
                        o4 = str(l[i].text)
                        o4 = o4[o4.index(' ') + 1:]
                        i += 1
                        break
                    else:
                        i += 1
                while (True):
                    if str(l[i].text).startswith('Answer: '):
                        ans = str(l[i].text)
                        ans = ans[ans.index(' ') + 1:]
                        i += 1
                        break
                    else:
                        i += 1
                while (True):
                    if str(l[i].text).startswith('Expl'):
                        exp = str(l[i].text)
                        exp = exp[exp.index(' ') + 1:]
                        i += 1
                        break
                    else:
                        i += 1
                updateDB([q, o1, o2, o3, o4, ans, exp])

            else:
                i += 1
        except:
            i += 1

Ejemplo n.º 14

0

Mostrar archivo

Archivo: scrapy.py Proyecto: annaraysk/python-codes

import requests
from tabulate import tabulate
from bs4 import BeautifulSoup as bss

pl=[]
myurl="https://www.flipkart.com/mobiles/smartphones~type/pr?sid=tyy%2C4io&page="
for i in range(1,5):
	s=requests.get(myurl+str(i))
	print('[+] done fetching websites')
	soup = bss(s.text, features='html.parser')
	for a in soup.findAll('a',href=True,attrs={'class':"_31qSD5"}):
		name=a.find('div', attrs={'class':'_3wU53n'}).text
		price=a.find('div', attrs={'class':'_1vC4OE _2rQ-NK'}).text
		specsList=a.find('ul', attrs={'class':'vFw0gD'})
		specs=specsList.find('li',attrs={'class':"tVe95H"}).text
		offerS=a.find('div',attrs={'class':'VGWI6T'})
		if offerS==None:
			offer=' '
		else:
			offer=offerS.span.text
		if len(offer)==6:
			offer='0'+offer
		pl.append([name,price,specs,offer])

print(tabulate(sorted(pl,key=lambda x:x[0],reverse=True),["Device","Price","Specs","Offer"],"fancy_grid"))

Ejemplo n.º 15

0

Mostrar archivo

from bs4 import BeautifulSoup as bss

# 모든 https 통신은 필요한 인증서와 호스트명을 기본적으로 체크하게 됨
# 영향 받는 라이브러리는 urllib, urllib2, http, httplib
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# Boannews Base URL
base_url = 'https://www.dailysecu.com'
url = base_url + '/?mod=news&act=articleList&view_type=S&sc_code=1435901200'
# url open
f = urlopen(url)

# page read
b = f.read()
soup = bss(b, 'html.parser')

divs = soup.find_all('div', {'class': 'list-block'})

file_data = []


def getDailyData():
    num = 0
    for i in divs:

        f = {}
        title = i.find('div', {'class': 'list-titles'})
        title = title.string

        url = i.find('a')['href']

Ejemplo n.º 16

0

Mostrar archivo

def updateQuestions(urllink):
    s = requests.get(urllink)
    soup = bss(s.text, features='html.parser')
    l = soup.findAll('p')
    fl = 0
    for i in l:
        if re.search('\d[\.][ ]', i.text):
            if fl == 1:
                updateDB([q, o1, o2, o3, o4, ans, ''])
                q = ''
                o1 = ''
                o2 = ''
                o3 = ''
                o4 = ''
                ans = ''
                fl = 0
            q = str(i.text)
            q = q[q.index(' ') + 1:]
            if ('A.' in q and 'Ans. ' in q):
                q, abcd = q.split('A. ')
                o1, bcd = abcd.split('B. ')
                o2, cd = bcd.split('C. ')
                o3, d = cd.split('D. ')
                o4, ans = d.split('Ans. ')
                if 'Expla' in ans:
                    ans, exp = ans.split('Explanation: ')
                    updateDB([q, o1, o2, o3, o4, ans, exp])
                    q = ''
                    o1 = ''
                    o2 = ''
                    o3 = ''
                    o4 = ''
                    ans = ''
                    exp = ''
                    fl = 0
                    #print('q',q)
        elif str(i.text).startswith('Ans.'):
            ans = str(i.text)
            ans = ans[ans.index(' ') + 1:]
            fl = 1
            #print('ans',ans)
        elif str(i.text).startswith('A.'):
            o1 = str(i.text)
            o1 = o1[o1.index(' ') + 1:]
            #print('o1',o1)
        elif str(i.text).startswith('B.'):
            o2 = str(i.text)
            o2 = o2[o2.index(' ') + 1:]
            #print('o2',o2)
        elif str(i.text).startswith('C.'):
            o3 = str(i.text)
            o3 = o3[o3.index(' ') + 1:]
            #print('o3',o3)
        elif str(i.text).startswith('D.'):
            o4 = str(i.text)
            o4 = o4[o4.index(' ') + 1:]
            #print('o4',o4)
        elif str(i.text).startswith('Expla'):
            exp = str(i.text)
            fl = 0
            exp = exp[exp.index(' ') + 1:]
            #print(exp)
            #print('exp',exp)
            updateDB([q, o1, o2, o3, o4, ans, exp])
            q = ''
            o1 = ''
            o2 = ''
            o3 = ''
            o4 = ''
            ans = ''
            exp = ''

Ejemplo n.º 17

0

Mostrar archivo

Archivo: wdriver.py Proyecto: annaraysk/python-codes

import time
import pickle
from selenium import webdriver
from selenium.webdriver.chrome import service
from bs4 import BeautifulSoup as bss

webdriver_service = service.Service(
    '/home/array/Downloads/operadriver_linux64/operadriver')
webdriver_service.start()

driver = webdriver.Remote(webdriver_service.service_url,
                          webdriver.DesiredCapabilities.OPERA)

myUserId = 'hack_it_like_you_know'
driver.get('https://instagram.com/')
cookie_file = 'cookie.data'
cookies = pickle.load(open(cookie_file, "rb"))
for i in cookies:
    driver.add_cookie(i)

driver.get('https://instagram.com/' + myUserId + '/followers')
driver.find_element_by_xpath(
    """//*[@id="react-root"]/section/main/div/header/section/ul/li[2]/a"""
).click()

followersPageSource = driver.page_source
soup = bss(followersPageSource, features='html.parser')

time.sleep(10)