def get_user_lists(self): """ Return all the imdb lists for this user. """ if not self.imdb_user: return [] ulist = [] url = self.users_url.format(userid=self.imdb_user) result = client.request(url) soup = bss(result, "html.parser") items = soup.findAll("div", {"class": "user-list"}) for item in items: list_id = item['id'] list_name = item.find("a", {"class": "list-name"}).get_text() if self.title_type == 'tvSeries': url = ("q=imdbUserList&listId={}").format(list_id) else: url = ("q=imdbUserList&listId={}").format(list_id) ulist.append({ 'name': list_name, 'id': list_id, 'url': url, 'tvdb': '0' }) return ulist
def acestream_channels(): """ Return a list of tvshows from acestream channels url """ items = [] r = requests.get(CHANNELS_URL) soup = bss(r.text, 'html.parser') for i in soup.find('table').findAll('tr'): try: stream_name = i.find('td').renderContents() if not stream_name: continue except: continue try: ace_link = i.find('a').get('href') except: continue try: style = i.find('a').get('style') except: pass if not style: color = 'grey' elif 'red' in style: color = 'red' elif 'green' in style: color = 'green' else: color = 'blue' items.append({'url': ace_link, 'color': color, 'desc': stream_name}) return items
def get3015(): url = 'https://10.65.31.18/hp/device/this.LCDispatcher?nav=hp.Usage' r = requests.get(url, verify=False) content = r.content soup = bss(content, 'lxml') taa = soup.findAll('table', id='tbl-1847')[-1] tab = taa.findAll('div', 'hpPageText')[-1].text print tab
def run(self): url = 'http://' + self.ip + '/hp/device/this.LCDispatcher?nav=hp.Usage' r = requests.get(url, verify=False) content = r.content soup = bss(content, 'html5lib') taa = soup.findAll('table', 'hpTable')[-1] tab = taa.findAll('span', 'hpPageText')[-1].text print tab
def get_imdb_url_contents(self, url, *args): """ Retrieve the list of shows for the given url """ if not url: return [] # Update the page limit to our setting path, params = self.url_decode(url) params.update({'count': self.count}) url = "{}/{}?{}".format(self.base_url, path, self.params_encode(params)) log_utils.log("Updated URL: {}".format(url)) results_list = [] result = client.request(url) soup = bss(result, "html.parser") next_url = self.get_next_link(path, soup) for li in soup.findAll("div", {"class": "lister-item"}): title = li.find("h3", { "class": "lister-item-header" }).find('a').getText() year_raw = li.find("span", {"class": "lister-item-year"}).getText() try: year = int(re.search('(\d+)', year_raw).group(0)) except: year = 'TBD' try: rating = li.find("div", { "class": "ratings-imdb-rating" }).find("strong").get_text() except: rating = '?' plot = li.find("p", {"class": ""}).getText().strip() imdb = li.find("div", { "class": "lister-item-image" }).find("img")['data-tconst'] poster = li.find("div", { "class": "lister-item-image" }).find("img")['loadlate'] results_list.append({ 'title': title, 'originaltitle': title, 'year': year, 'rating': rating, 'plot': plot, 'imdb': imdb, 'poster': poster, 'tvdb': '0', 'next': next_url, }) return results_list
def acesearch(term): """ Search URL for acestreams and return list of dictionaries containing the name and acestream URL """ r = requests.post(SEARCH_URL, data={'cn': term}) soup = bss(r.text, "html.parser") items = [] for i in soup.findAll('a', {'class': 'list-group-item'}): items.append({'url': i['href'], 'desc': i.contents[0]}) return items
def run(self): global prtnum try: url = 'https://' + self.ip + '/hp/device/this.LCDispatcher?nav=hp.Usage' r = requests.get(url, verify=False) content = r.content soup = bss(content, 'html5lib') taa = soup.findAll('table', id='tbl-1847')[-1] tab = taa.findAll('div', 'hpPageText')[-1].text mylock.acquire() prtnum[self.ip] = tab mylock.release() except: mylock.acquire() prtnum[self.ip] = 0 mylock.release()
def get_imdb_url_contents(self, url, *args): """ Retrieve the list of shows for the given url """ if not url: return [] results_list = [] result = client.request(url) soup = bss(result, "html.parser") for li in soup.findAll("div", {"class": "lister-item"}): title = li.find("h3", { "class": "lister-item-header" }).find('a').getText() year_raw = li.find("span", {"class": "lister-item-year"}).getText() try: year = int(re.search('(\d+)', year_raw).group(0)) except: year = 'TBD' try: rating = li.find("div", { "class": "ratings-imdb-rating" }).find("strong").get_text() except: rating = '?' plot = li.find("p", {"class": ""}).getText().strip() imdb = li.find("div", { "class": "lister-item-image" }).find("img")['data-tconst'] poster = li.find("div", { "class": "lister-item-image" }).find("img")['loadlate'] results_list.append({ 'title': title, 'originaltitle': title, 'year': year, 'rating': rating, 'plot': plot, 'imdb': imdb, 'poster': poster, 'tvdb': '0', }) return results_list
def kavery(self): base_url = 'http://kavery.org.in/placement11-12.aspx' head = {'User-Agent': 'Mozilla/5.0 (Linux; <Android Version>'} req = requests.get(url=base_url, headers=head) soup = bss(req.content, 'html.parser') lokt = soup.find('table') t_row = lokt.find_all('tr') lst = [] for tm in t_row: td = tm.find_all('td') row = [tp.get_text() for tp in td if tp.get_text()] lst.append(row) st = 0 l = ['s.No', 'Name', 'Placed'] print(l) while st <= 27: print(lst[st]) st += 1
def updateQuestions(urllink): s = requests.get(urllink) soup = bss(s.text, features='html.parser') l = soup.findAll('p') fl = 0 i = 0 for j, i in enumerate(l): try: if re.search('\d[\.][ ]', i.text): q = str(i.text) q = q[q.index(' ') + 1:] q, abcd = q.split('(a) ') o1, bcd = abcd.split('(b) ') o2, cd = bcd.split('(c) ') o3, o4 = cd.split('(d) ') if str(i.text).startswith('Answer: '): ans = str(i.text) ans = ans[ans.index(' ') + 1:] updateDB([q, o1, o2, o3, o4, ans, '']) except: continue
import requests import re url = "webmail.daiict.ac.in" master = 'https://webmail.daiict.ac.in/' r = requests.get(master) br = Browser() br.set_handle_robots(False) br.open("https://webmail.daiict.ac.in/zimbra/") br.select_form(name='loginForm') br.form['username'] = WEBMAIL_ID br.form['password'] = WEBMAIL_PASS br.submit() response = br.response().read() pool = bss(response) messages = pool.find_all('tbody', {'id': 'mess_list_tbody'})[0] messages = messages.findAll('a', href=True) queue = [] for message in messages: ID = message['id'] queue.append(ID) delay = 3 wait = WebDriverWait(driver, delay) for each in queue: message = wait.until(EC.element_to_be_clickable((By.ID, each))) message.click() forward = wait.until(EC.element_to_be_clickable((By.ID, 'OPFORW')))
def html2unicode(text): """Converts HTML entities to unicode. For example '&' becomes '&'.""" text = unicode(bss(text, convertEntities=bss.ALL_ENTITIES)) return text
def updateQuestions(urllink): s = requests.get(urllink) soup = bss(s.text, features='html.parser') l = soup.findAll('p') fl = 0 i = 0 while (i < len(l)): try: if re.search('\d[\.][ ]\w+', l[i].text): q = str(l[i].text) q = q[q.index(' ') + 1:] i += 1 while (True): if str(l[i].text).startswith('(A) '): o1 = str(l[i].text) o1 = o1[o1.index(' ') + 1:] i += 1 break else: i += 1 while (True): if str(l[i].text).startswith('(B) '): o2 = str(l[i].text) o2 = o2[o2.index(' ') + 1:] i += 1 break else: i += 1 while (True): if str(l[i].text).startswith('(C) '): o3 = str(l[i].text) o3 = o3[o3.index(' ') + 1:] i += 1 break else: i += 1 while (True): if str(l[i].text).startswith('(D) '): o4 = str(l[i].text) o4 = o4[o4.index(' ') + 1:] i += 1 break else: i += 1 while (True): if str(l[i].text).startswith('Answer: '): ans = str(l[i].text) ans = ans[ans.index(' ') + 1:] i += 1 break else: i += 1 while (True): if str(l[i].text).startswith('Expl'): exp = str(l[i].text) exp = exp[exp.index(' ') + 1:] i += 1 break else: i += 1 updateDB([q, o1, o2, o3, o4, ans, exp]) else: i += 1 except: i += 1
import requests from tabulate import tabulate from bs4 import BeautifulSoup as bss pl=[] myurl="https://www.flipkart.com/mobiles/smartphones~type/pr?sid=tyy%2C4io&page=" for i in range(1,5): s=requests.get(myurl+str(i)) print('[+] done fetching websites') soup = bss(s.text, features='html.parser') for a in soup.findAll('a',href=True,attrs={'class':"_31qSD5"}): name=a.find('div', attrs={'class':'_3wU53n'}).text price=a.find('div', attrs={'class':'_1vC4OE _2rQ-NK'}).text specsList=a.find('ul', attrs={'class':'vFw0gD'}) specs=specsList.find('li',attrs={'class':"tVe95H"}).text offerS=a.find('div',attrs={'class':'VGWI6T'}) if offerS==None: offer=' ' else: offer=offerS.span.text if len(offer)==6: offer='0'+offer pl.append([name,price,specs,offer]) print(tabulate(sorted(pl,key=lambda x:x[0],reverse=True),["Device","Price","Specs","Offer"],"fancy_grid"))
from bs4 import BeautifulSoup as bss # 모든 https 통신은 필요한 인증서와 호스트명을 기본적으로 체크하게 됨 # 영향 받는 라이브러리는 urllib, urllib2, http, httplib import ssl ssl._create_default_https_context = ssl._create_unverified_context # Boannews Base URL base_url = 'https://www.dailysecu.com' url = base_url + '/?mod=news&act=articleList&view_type=S&sc_code=1435901200' # url open f = urlopen(url) # page read b = f.read() soup = bss(b, 'html.parser') divs = soup.find_all('div', {'class': 'list-block'}) file_data = [] def getDailyData(): num = 0 for i in divs: f = {} title = i.find('div', {'class': 'list-titles'}) title = title.string url = i.find('a')['href']
def updateQuestions(urllink): s = requests.get(urllink) soup = bss(s.text, features='html.parser') l = soup.findAll('p') fl = 0 for i in l: if re.search('\d[\.][ ]', i.text): if fl == 1: updateDB([q, o1, o2, o3, o4, ans, '']) q = '' o1 = '' o2 = '' o3 = '' o4 = '' ans = '' fl = 0 q = str(i.text) q = q[q.index(' ') + 1:] if ('A.' in q and 'Ans. ' in q): q, abcd = q.split('A. ') o1, bcd = abcd.split('B. ') o2, cd = bcd.split('C. ') o3, d = cd.split('D. ') o4, ans = d.split('Ans. ') if 'Expla' in ans: ans, exp = ans.split('Explanation: ') updateDB([q, o1, o2, o3, o4, ans, exp]) q = '' o1 = '' o2 = '' o3 = '' o4 = '' ans = '' exp = '' fl = 0 #print('q',q) elif str(i.text).startswith('Ans.'): ans = str(i.text) ans = ans[ans.index(' ') + 1:] fl = 1 #print('ans',ans) elif str(i.text).startswith('A.'): o1 = str(i.text) o1 = o1[o1.index(' ') + 1:] #print('o1',o1) elif str(i.text).startswith('B.'): o2 = str(i.text) o2 = o2[o2.index(' ') + 1:] #print('o2',o2) elif str(i.text).startswith('C.'): o3 = str(i.text) o3 = o3[o3.index(' ') + 1:] #print('o3',o3) elif str(i.text).startswith('D.'): o4 = str(i.text) o4 = o4[o4.index(' ') + 1:] #print('o4',o4) elif str(i.text).startswith('Expla'): exp = str(i.text) fl = 0 exp = exp[exp.index(' ') + 1:] #print(exp) #print('exp',exp) updateDB([q, o1, o2, o3, o4, ans, exp]) q = '' o1 = '' o2 = '' o3 = '' o4 = '' ans = '' exp = ''
import time import pickle from selenium import webdriver from selenium.webdriver.chrome import service from bs4 import BeautifulSoup as bss webdriver_service = service.Service( '/home/array/Downloads/operadriver_linux64/operadriver') webdriver_service.start() driver = webdriver.Remote(webdriver_service.service_url, webdriver.DesiredCapabilities.OPERA) myUserId = 'hack_it_like_you_know' driver.get('https://instagram.com/') cookie_file = 'cookie.data' cookies = pickle.load(open(cookie_file, "rb")) for i in cookies: driver.add_cookie(i) driver.get('https://instagram.com/' + myUserId + '/followers') driver.find_element_by_xpath( """//*[@id="react-root"]/section/main/div/header/section/ul/li[2]/a""" ).click() followersPageSource = driver.page_source soup = bss(followersPageSource, features='html.parser') time.sleep(10)