def stdreq(q=''): c = u('http://127.0.0.1:2938/' + q) t = c.read().decode() c.close() del (c) mod('gc').collect() return t
def scan(q): global h if q in h: return h += [q] f = q try: q = u(q).read().decode() except E: return if 'устав' in q.lower(): print(f) q = q.replace('<', '\x01\x02').replace('>', '\x01') q = q.split('\x01') q = [w for w in q if w and w[0] == '\x02'] q = [w.split() for w in q] q = [[e.split('=')[1] for e in w if '=' in e] for w in q] q = ['\x01'.join(w) for w in q] q = '\x01'.join(q) q = q.split('\x01') q = [w[1:] if w and w[0] in '"\'' else w for w in q] q = [w[:-1] if w and w[-1] in '"\'' else w for w in q] q = [[w] for w in q if w] for w in q: if w[0][0] == '/': w[0] = 'http://kpml.ru' + w[0] elif w[0][:7] != 'http://' and w[0][:8] != 'https://': if f[-1] == '/': w[0] = f + w[0] else: w[0] = f + '/' + w[0] for w in q: scan(w[0])
def getPlural(key): # f'https://en.wiktionary.org/wiki/{123}' v = u(f'https://en.wikipedia.org/w/index.php?title={q(key)}&action=edit') h = v.read().decode('utf-8') soup = b(h, 'html.parser') infoBoxTry = soup.select('#wpTextbox1') if len(infoBoxTry) > 0: editArea = infoBoxTry[0].text infoBox = fa('(?<=plural = ).*', editArea) if len(infoBox) > 0: formatted = s( r'(\{.*\}|\(.*\)|\t| | |\'\'.*\'\'|.*\: |<br>|<!--.*-->|/.*|\{\{.*\|)', '', infoBox[0] ) # TODO: {{plainlist}} not supported, needs a workaround if len(formatted) > 1: print(f'{key}: {formatted.lower()}') return formatted.lower() else: return None else: return None else: return None
from urllib.request import urlopen response = urlopen('http://www.google.com') print(response.status) from urllib.request import urlopen as u response = u('http://www.google.com') print(response.status) from urllib.request import Request, urlopen req = Request('http://www.google.com') response = urlopen(req) print(response.status) from urllib.request import Request as r, urlopen as u req = r('http://www.google.com') response = u(req) print(response.status)
print(pi) print(sqrt(4.0)) print(sqrt(2.0)) from math import sqrt as s, pi as p print(p) print(s(4.0)) print(s(2.0)) import urllib.request as r response = r.urlopen('http://www.google.co.kr') print(response.status) from urllib.request import Request, urlopen req = Request('http://www.google.co.kr') response = urlopen(req) print(response.status) from urllib.request import Request as r, urlopen as u req = r('http://www.naver.com') response = u(req) print(response.status) from math import pi r = float(input()) print(pi * r**2)
from os import system as s from urllib.request import urlopen as u from time import * while 1: d=0 t=time() while 1: try: u('https://vk.com',timeout=3600).read() break except: pass t=time()-t n=16/(max(d-t,0)+1) sleep(n) s('termux-notification -c '+str(t)) d=d*.9+t*.1
from urllib.request import urlopen as u q=u('http://kpml.ru/pages/raspisanie/izmeneniya-v-raspisanii').read().decode() print(q) ''' q=open('site').read() q=q.split('\n') q=[[len(w),w] for w in q] q=max(q)[1] q=q.replace(' ','') q=q.replace('>','>\n') q=q.replace('<','\n<') q=q.replace('\n\n','\n') q=q.split('\n') q=[[w,'',''] for w in q if w and w[0] != '<'] i='' for w in q: if w[0][:9] == 'Изменения': i=w[0].split('-')[1] i=i.lower() i=i.split() i[1]='января февраля марта апреля мая июня июля августа сентября октября ноября декабря'.split().index(i[1]) i[1]+=1 i=i[:-1] else: w[1]=i for w in q: w[0]=w[0].strip() c=[] for w in 'АБВ': for e in range(1,12): c+=[str(e)+w]
# import as로 패키지 모듈 이름 지정하기 import urllib.request as r # urllib 패키지의 request 모듈을 가져오면서 이름을 r로 지정 response = r.urlopen('http://www.google.co.kr') # r로 urlopen 함수 사용 response.status # from import로 패키지의 모듈에서 일부만 가져오기 from urllib.request import Request, urlopen # urlopen 함수, Request 클래스를 가져옴 req = Request('http://www.google.co.kr') # Request 클래스를 사용하여 req 생성 response = urlopen(req) # urlopen 함수 사용 response.status # from import로 패키지의 모듈의 일부를 가져온 뒤 이름 지정하기 from urllib.request import Request as r, urlopen as u req = r('http://www.google.co.kr') # r로 Request 클래스 사용 response = u(req) # u로 urlopen 함수 사용 response.status # 파이썬 패키지 인덱스에서 패키지 설치하기 # pip 설치하기 (윈도우는 내장되있음) # Windows에서는 명령 프롬프트를 실행(윈도우 키+R을 누른 뒤 cmd를 입력)하고, 리눅스와 # macOS에서는 콘솔(터미널)을 실행한 뒤 pip install requests 명령을 입력합니다(pip # 명령은 파이썬 셸 >>>에 입력하면 안 됩니다. 반드시 명령 프롬프트, 콘솔, 터미널에 # 입력해주세요). # 참고로 requests는 파이썬 표준 라이브러리의 urllib.request와 비슷한 역할을 하는 # 패키지인데 좀 더 기능이 많고 편리합니다. # import로 패키지 가져오기 import requests # pip로 설치한 requests 패키지를 가져옴 r = requests.get('http://www.google.co.kr') # requests.get 함수 사용
from urllib.request import urlopen as u from bs4 import BeautifulSoup as bs my_url = "https://www.crummy.com/software/BeautifulSoup/bs4/doc/" #open the connection and get the page with the above URL uClient = u(my_url) page_html = uClient.read() # HTML content uClient.close() #parse the HTML content pageContent = bs(page_html, "html.parser") print(pageContent.h1) print(pageContent.p) count = pageContent.findAll("div") print("Count is: len(count)") print(count[0])
from urllib.request import urlopen as u q=u('http://www.mobilenin.com/mobilepythonbook/examples.html').read().decode() q=q.split('<tr>')[1:] q=[w.split('|')[1] for w in q] q=[w.split('<a href="')[1] for w in q] q=[w.split('"')[0] for w in q] q=['http://www.mobilenin.com/mobilepythonbook/'+w for w in q] from os import system as sh print([w for w in [sh('wget '+w) for w in q] if w])
import bs4 # first we import an library required for web scraping (bs4-beautiful soup ) from urllib.request import urlopen as u # this is to parse an web url into python, i have declared it to be called 'u' from bs4 import BeautifulSoup as soup # here beautiful soup module is imported from the bs4 library url = 'https://www.indeed.ae/jobs?q=oil+and+gas+&l=abudhabi' # store the webpage url in an variable url2 = 'https://www.indeed.ae/jobs?q=oil+and+gas+&l=abudhabi&start=10' # same process" uclient = u(url) # here we invoke urlopen function to render for python uclient2 = u(url2) page = uclient.read() # this code enables the content on the web html to be read by the parser into a beautiful soup object.. page2 = uclient2.read() uclient.close() # close function cuts connection from the website once html is read and converted so, even if networ error occurs our file will be running without error uclient2.close() scrap = soup(page,"html.parser") # The soup function splits the html file content as a dictionary format in python or key value classification of the html file scrap2 = soup(page2, "html.parser") # scrap.h1 # scrap.body fin = scrap.findAll("div", {"class": "title"}) # Here i only wanted to find the title of the jobs listed, so I coded using find all function that fetches me job title from different classes of the division tag in html fin2 = scrap2.findAll("div", {"class": "title"}) fin fin2 len(fin) # This is just to know how many job titles are there in the file, if you want to know just use print function to see the output fin[0] a = fin[0] # python is zero indexed so we initialize a variable at that point
from urllib.request import urlopen as u from bs4 import BeautifulSoup as b #take input of the url, going to the page reading the page and making beutisoup object s = b(u(input('Enter the url: ')).read(),'html.parser') #opening a file with dynamic name f = open(input('Enter the name of the file: ')+".html", "w",encoding="utf-8") #writing into the file after converting into sting f.write(str(s)) f.close()
else: return None else: return None else: return None def getPopularity(key): f'https://tools.wmflabs.org/pageviews/?project=en.wikipedia.org&platform=all-access&agent=user&range=latest-20&pages={key}' doneCurrencyNames = {} currencies = "https://en.wikipedia.org/wiki/List_of_circulating_currencies" v = u(currencies) h = v.read().decode('utf-8') soup = b(h, 'html.parser') shift = 0 se = soup.select("#mw-content-text > div > table > tbody > tr") for i in range(len(se)): print(f'{i+1}/{len(se)}') tdList = se[i].findAll('td') if len(tdList) >= 5: if len(tdList) == 5: shift = 1 else: shift = 0 currName = s(r'\[.*\]', '', tdList[1 - shift].text).replace('\n', '') if currName == '(none)': continue