def leerBaloncestoLigas(): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'} req = Request( url='https://www.centroapuesta.com/apuestas/futbol/espana/laliga/', headers=headers) html = urlopen(req).read() soup2 = BeautifulSoup(html) content = soup2.find("div", {"id": "content"}) menu_apuestas_deportes = content.find( "div", {"id": "menu-apuestas-deportes"}) sports_menu = menu_apuestas_deportes.find("nav", {"class": "sports-menu"}) ul = sports_menu.find("ul") '''SPORTS''' li = ul.find("li", {"class": "baloncesto"}) ul = li.find("ul") '''COUNTRIES''' lis = ul.findAll("li") for li in lis: ul = li.find("ul") if ul is not None: links = ul.findAll('a') for link in links: cuotasBaloncesto(link["href"])
def downloadFile( sourceurl, targetfname ): mem_file = '' good_read = False xbrlfile = None if os.path.isfile( targetfname ): print('Local copy already exists') return True else: print('Downloading:', sourceurl)] try: xbrlfile = urlopen( sourceurl ) try: mem_file = xbrlfile.read() good_read = True finally: xbrlfile.close() except HTTPError as e: print('HTTP Error:', e.code) except URLError as e: print('URL Error:', e.reason) except TimeoutError as e: print('Timeout Error:', e.reason) except socket.timeout: print('Socket Timeout Error') if good_read: output = open( targetfname, 'wb') output.write( mem_file ) output.close() return good_read
def lookup_cik(ticker, name=None): # Given a ticker symbol, retrieves the CIK good_read = False ticker = ticker.strip().upper() url = 'http://www.sec.gov/cgi-bin/browse-edgar/action-getcompny&CIK=(cik)&count=10&output=xmp'.format(cik=ticker) try: xmlFile = urlopen( url ) try: xmlData = xmlFile.read() good_read = True finally: xmlFile.close() except HTTPError as e: print('HTTP Error', e.code) except URLError as e: print('Url Error', e.reason) except TimeoutError as e: print('Timeout Error', e.reason) except socket.timeout: print('Socket Timeout Error') if not good_read: print('Unable to lookup CIK for ticker', ticker) return try: root = ET.fromstring(xmlData) except ET.ParseError as perr: print('XML Parser error:', perr) try: cikElement - list(root.iter('CIK'))[0] return int(cikElement.text) except StopIteration: pass
def getCountry(ipAddress): try: response = urlopen("http://freegeoip.net/json/" + ipAddress).read().decode('utf-8') except HTTPError: return None responseJson = json.loads(response) return responseJson.get("country_code")
def main(geo): # find the FTP address from [url=https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GEO]https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GEO[/url] response = urlopen( "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={}".format(geo)) pattern = re.compile("<a href=\"(.*?)\">\(ftp\)</a>") # use wget from shell to download SRA data ftp_address = re.search(pattern, response.read().decode('utf-8')).group(1) os.system(' wget -nd -r 1 -A *.sra ' + ftp_address)
def check_network(): while True: try: result=urlopen('http://www.google.com').read() print("Network is Ready!") break except Exception: print("Network is not ready,Sleep 5s....") time.sleep(5) return True
def cnn(): try: jsonObj=urlopen("https://newsapi.org/v2/top-headlines?sources=cnn&apiKey=######") data=json.load(jsonObj) print(" ============CNN News==================") i=1 for item in data['articles']: print(str(i)+'.'+item['title']+'\n'+item['description']+'\n') i+=1 except Exception as e: print(str(e))
def new_scientist(): try: jsonObj=urlopen('https://newsapi.org/v2/top-headlines?sources=new-scientist&apiKey=######') data=json.load(jsonObj) i=1 print(' ==================New Scientist============') for item in data['articles']: print(str(i)+'.'+item['title']+'\n'+item['description']+'\n') i+=1 except Exception as e: print(str(e))
def google_india_news(): try: jsonObj=urlopen('https://newsapi.org/v2/top-headlines?sources=google-news-in&apiKey=######') data=json.load(jsonObj) i=1 print(' ==================Google India News============') for item in data['articles']: print(str(i)+'.'+item['title']+'\n'+item['description']+'\n') i+=1 except Exception as e: print(str(e))
def read_url(url): url = url.replace(" ","%20") req = Request(url) a = urlopen(req).read() soup = BeautifulSoup(a, 'html.parser') x = (soup.find_all('a')) for i in x: file_name = i.extract().get_text() url_new = url + file_name url_new = url_new.replace(" ","%20") if(file_name[-1]=='/' and file_name[0]!='.'): read_url(url_new) print(url_new)
def times_of_india(): try: jsonObj=urlopen('https://newsapi.org/v2/top-headlines?sources=techcrunch&apiKey=######') data=json.load(jsonObj) i=1 print(''' ==============TIMES OF INDIA============''' + '\n') for item in data['articles']: print(str(i) + '. ' + item['title'] + '\n') print(item['description'] + '\n') i += 1 except Exception as e: print(str(e))
def html_doc(soup): html_doc = urlopen('http://').read() soup = BeautifulSoup(html_doc) #print(soup) #print(soup.title) #print(soup.title.string) for meta in soup.find_all('meta'): meta.get('content') for link in soup.find_all('a'): link.get('href') for link in soup.find_all('a'): link.contents[0] soup.find('div', 'content') soup.find('div', id='top_menu')
def Extract_subcript(self, video_id): url = "http://video.google.com/timedtext?lang=en&v=" + video_id html_page = urlopen(url) bs_obj = BeautifulSoup(html_page, "html.parser") lines = bs_obj.transcript.find_all("text") # BeautifulSoup으로 자막 태그 전부 lines에 저장 captions = [""] # 한줄한줄씩 저장할 리스트 caption = "" # 자막 전부를 저장할 문자열 for line in lines: one_line = html.unescape(line.get_text()) # 필요한 경우 전부 소문자로 바꿈 # one_line = html.unescape(line.get_text()).lower() one_line = one_line.replace("\n", " ") one_line = one_line.split(" ") print(one_line) captions += one_line return captions
def weatherAPISearch(location): state = 'mn' # Input for state city = 'state%20jello' # Input for state, %20 used as placeholder for space since user can't enter space # Get the dataset url = urllib.request.urlopen('http://api.wunderground.com/api/' + 'key' + '/' + 'temp_f' + '/' + 'q' + '/' + state + '/' + city +'.json') call = urlopen(url) json_string = response.read().decode('utf-8') parsed_json = json.loads(string) # Get key data pairs location = parsed_json['location']['city'] temp_f = parsed_json['current_observation']['temp_f'] # Print temperature based on city location print ("Current temperature in %s is: %s" % (location, temp_f))
def SECdownload(year, month): root = None feedFile = None feedData = None good_read = False itemIndex = 0 edgarFilingsFeed = 'http://www.sec.gov/Archives/edgar/monthly/xbrlrss-' + \ str(year) + '-' + str(month).zfill(2) + '.xml' print(edgarFilingsFeed) if not os.path.exists( "sec/" + str(year)): os.makedirs("sec/" + str(year)) if not os.path.exists( "sec/" + str(year) + '/' + str(month).zfill(2)): os.makedirs("sec/" + str(year) + '/' + str(month).zfill(2) ) target_dir = "sec/" + str(year) + '/' + str(month).zfill(2) +'/' try: feedFile = urlopen( edgarFilingsFeed ) try: feedData = feedFile.read() good_read = True finally: feedFile.close() except HTTPError as e: print("HTTP Error:", e.code)
import sys +try: + from urllib.request import urlopen +except ImportError: + # py2 + from urllib2 import urlopen + + # Environment variable for the socket url # (used by clients to locate the socket [http, zmq(unix, tcp)]) CTX_SOCKET_URL = 'CTX_SOCKET_URL' @@ -60,9 +65,8 @@ def zmq_client_req(socket_url, request, timeout): def http_client_req(socket_url, request, timeout): - response = urllib2.urlopen(socket_url, - data=json.dumps(request), - timeout=timeout) + response = urlopen( + socket_url, data=json.dumps(request).encode('utf-8'), timeout=timeout) if response.code != 200: raise RuntimeError('Request failed: {0}'.format(response)) return json.loads(response.read()) @@ -137,3 +141,4 @@ def main(args=None): if __name__ == '__main__': main() +
@@ -3371,7 +3371,7 @@ else: color = colors_bgdark[id_] msg = '\033[3%sm%s\033[m' % (color, msg) - print "++ %s: %s" % (ids[id_], msg) + print("++ %s: %s" % (ids[id_], msg)) def Readfile(file_path, remove_linebreaks=0, ignore_error=0): @@ -3388,7 +3388,7 @@ # URL elif PathMaster().is_url(file_path): try: - from urllib import urlopen + from urllib.request import urlopen f = urlopen(file_path) if f.getcode() == 404: # URL not found raise @@ -3409,7 +3409,7 @@ Error(_("Cannot read file:") + ' ' + file_path) if remove_linebreaks: - data = map(lambda x: re.sub('[\n\r]+$', '', x), data) + data = [re.sub('[\n\r]+$', '', x) for x in data] Message(_("File read (%d lines): %s") % (len(data), file_path), 2) return data @@ -3427,13 +3427,13 @@ cont = [] if CONF['target'] in ('aat', 'rst', 'txt'): for line in contents:
# -*- coding: utf-8 -*- """ Created on Wed Jul 25 10:38:47 2018 @author: Kritika Mishra """ import urlopen from bs4 import BeautifulSoup from nltk.corpus import stopwords from nltk.tokenize import word_tokenize url = "http://news.bbc.co.uk/2/hi/health/2284783.stm" html = urlopen(url).read() soup = BeautifulSoup(html) # kill all script and style elements for script in soup(["script", "style"]): script.extract() # rip it out # get text text = soup.get_text() # break into lines and remove leading and trailing space on each lines = (line.strip() for line in text.splitlines()) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # drop blank lines text = '\n'.join(chunk for chunk in chunks if chunk) print(text)
from urllib2_file import Request import urlopen headers = {'Authorization': '08e15c0a-4119-4ed5-8852-bf512a3cd434'} request = Request( 'https://api.wizenoze.com/v1/customSearchEngines?reading=reading%20english&english=&grade%201=', headers=headers) response_body = urlopen(request).read() print(response_body)
def main(): keywordPath = "features.txt" # this should be the same keywords list/order used for training the ML Model count_vect = loadKeywords(keywordPath, False) keywords = count_vect.vocabulary_ print("keywords:") print(keywords) sorted_keywords = sortingDict(keywords) kList = [] for item in sorted_keywords: kList.append(item[]) print(kList) modelBin = 'ocean.bin' listTopN = closeWordsList(modelBin, kList, 5) print(listTopN) x_train = [] y_train = [] with open('train.csv', 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: y_train.append(int(row[1])) noneContents = [] x_n = None y_n = array(y_train) for i in range(1, 201): parsed = parser.from_file('/data/search_term_generation/200_files/' + str(i)) content = parsed["content"] if content is not None: tempX = count_vect.transform(parsed["content"].split()) x_train.append(tempX) print(str(i) + ":") print(tempX.toarray().sum(axis=)) if x_n is None: x_n = array([tempX.toarray().sum(axis=)]) else: x_n = np.concatenate((x_n, [tempX.toarray().sum(axis=)]), axis=) else: noneContents.append(i) print(noneContents) np.savetxt('x_n.txt', x_n, fmt='%d') np.savetxt('y_n.txt', y_n, fmt='%d') x = np.loadtxt('x_n.txt', dtype=int) x_with_closeWords = addCloseCounts(listTopN, x) y = np.loadtxt('y_n.txt', dtype=int) mergeAllContents(y) x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=50) print(y_test) cv = ShuffleSplit(n_splits=5, test_size=0.1, random_state=) clf = GaussianNB() scoreNB = cross_val_score(clf, x, y, cv=cv) print(scoreNB) print("performance with close words added:") clf11 = GaussianNB() scoreNB2 = cross_val_score(clf11, x_with_closeWords, y, cv=cv) print(scoreNB2) clf1 = GaussianNB().fit(x_train, y_train) y_pred = clf1.predict(x_test) accNum = accuracy(y_pred, y_test) print("Model: Naive Bayes") acc = (y_test == y_pred).sum() / float(len(y_test)) print("Test Accuracy:" + str(acc)) print("Test Accuracy with 3 classes:" + str(accNum / 20)) acc_train = (y_train == clf1.predict(x_train)).sum() / float(len(y_train)) print("Train Accuracy:" + str(acc_train)) from sklearn import linear_model clf22 = linear_model.SGDClassifier() scoreSVM = cross_val_score(clf22, x, y, cv=cv) print(scoreSVM) print("performance with close words added:") clf222 = linear_model.SGDClassifier() scoreSVM2 = cross_val_score(clf222, x_with_closeWords, y, cv=cv) print(scoreSVM2) clf2 = linear_model.SGDClassifier().fit(x_train, y_train) y_pred2 = clf2.predict(x_test) accNum2 = accNum = accuracy(y_pred2, y_test) print("Model: SVM") acc = (y_test == y_pred2).sum() / float(len(y_test)) print("Test Accuracy:" + str(acc)) print("Test Accuracy with 3 classes:" + str(accNum2 / 20)) acc_train = (y_train == clf2.predict(x_train)).sum() / float(len(y_train)) print("Train Accuracy:" + str(acc_train)) print("******************") clf33 = MLPClassifier(max_iter=2000, learning_rate='adaptive') scoreNN = cross_val_score(clf33, x, y, cv=cv) print(scoreNN) print("performance with close words added:") clf333 = MLPClassifier(max_iter=2000, learning_rate='adaptive') scoreNN3 = cross_val_score(clf333, x_with_closeWords, y, cv=cv) print(scoreNN3) clf3 = MLPClassifier(max_iter=2000, learning_rate='adaptive').fit(x_train, y_train) y_pred3 = clf3.predict(x_test) accNum3 = accNum = accuracy(y_pred3, y_test) print("Model: Neural Network") acc = (y_test == y_pred3).sum() / float(len(y_test)) print("Test Accuracy:" + str(acc)) print("Test Accuracy with 3 classes:" + str(accNum3 / 20)) acc_train = (y_train == clf3.predict(x_train)).sum() / float(len(y_train)) print("Train Accuracy:" + str(acc_train)) print("******************") from sklearn.ensemble import RandomForestClassifier clf44 = RandomForestClassifier(n_estimators=100) scoreRF = cross_val_score(clf44, x, y, cv=cv) print(scoreRF) clf444 = RandomForestClassifier(n_estimators=100) print("performance with close words added:") scoreRF4 = cross_val_score(clf444, x_with_closeWords, y, cv=cv) print(scoreRF4) clf4 = RandomForestClassifier(n_estimators=100).fit(x_train, y_train) y_pred4 = clf4.predict(x_test) accNum4 = accNum = accuracy(y_pred4, y_test) print("Model: Random Forest") acc = (y_test == y_pred4).sum() / float(len(y_test)) print("Test Accuracy:" + str(acc)) print("Test Accuracy with 3 classes:" + str(accNum4 / 20)) acc_train = (y_train == clf4.predict(x_train)).sum() / float(len(y_train)) print("Train Accuracy:" + str(acc_train)) noneContents = array(noneContents) xOut = TemporaryFile() yOut = TemporaryFile() noneContentsOut = TemporaryFile() np.save(xOut, x_n) np.save(yOut, y_n) np.save(noneContentsOut, noneContents) with open('train.csv', 'r') as csvfile: reader = csv.reader(csvfile, delimiter=',') i = for row in reader: i += 1 if i > 30: url = row[] print(url) print(download_file("http://" + url, i)) requests.get("http://" + url, stream=True, headers={'User-agent': 'Mozilla/5.0'}) label = row[1] reqLink = urlopen("http://" + url) content = reqLink.read() contentFeatures = count_vect.transform(content.split())
"class %%%(object):\n\tdef __init__(self, ***)": "class %%% has-a __init__ that takes self and *** parameters", "class %%%(object):n\tdef ***(self, @@@)": "class %%% has-a function named *** that takes self and @@@ parameters", "*** = %%%()": "Set *** to an instance of class %%%", "***.***(@@@)": "From *** get the *** function, and call it with parameters self, @@@", "***.*** = '***'": "From *** get the *** attribute and set it to '***"." } PHRASE_FIRST = False if len(sys.argv) == 2 and sys.argv[1] == "english": PHRASE_FIRST = True for word in urlopen(WORD_URL).readlines(): WORDS.append(word.strip()) def convert(snippet, phrase): class_names = [w.capitalize() for w input( random.sample(WORDS, snippet.count("%%%"))] other_name = random.sample(WORDS, snippet.count("***")) results = [] param_names = [] for i in range(0, snippet.count("@@@")): param_count = random.randint(1,3) param_names.append(', '.join(random.sample(WORDS,param_count) )]
#List of column names COLUMN_NAMES = [ 'employer','download','location','union', 'local', 'naics', 'num_workers', 'expiration_date' ] STATES = [ 'AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY' ] page = urlopen('http://www.dol.gov/olms/regs/compliance/cba/Cba_CaCn.htm') rawtext = page.read() html = fromstring(rawtext) #print tostring(html) tables = html.cssselect('table') table = tables[2] for tr in table.cssselect('tr')[1:]: cellvalues = [td.text_content() for td in tr.cssselect('td')] data = dict(zip(COLUMN_NAMES, cellvalues)) print data data['num_workers'] = int(data['num_workers']) data['expiration_date'] = \ datetime.datetime.strptime(data['expiration_date'], '%m-%d-%y').date()
#-*- coding: utf-8 -*- import urlopen as urlopen import bs4 as BeautifulSoup url = "https://www.rottentomatoes.com/" html = urlopen(url) source = html.read() # 바이트코드 type으로 소스를 읽는다. html.close() # urlopen을 진행한 후에는 close를 한다. soup = BeautifulSoup( source, "html5lib" ) # 파싱할 문서를 BeautifulSoup 클래스의 생성자에 넘겨주어 문서 개체를 생성, 관습적으로 soup 이라 부름 table = soup.find(id="Top-Box-Office") movies = table.find_all(class_="middle_col") for movie in movies: title = movie.get_text() print(title) link = movie.a.get('href') url = 'https://www.rottentomatoes.com' + link print(url)
# from urllib2 import urlopen try: from urllib.request import urlopen except ImportError: from urllib2 import urlopen from bs4 import BeautifulSoup # Read the URL and save text in html1 and then in text. url1 = "https://www.theguardian.com/politics/2018/sep/20/the-death-of-consensus-how-conflict-came-back-to-politics" html1 = urlopen(url1).read().decode('utf8') BeautifulSoup(html1).get_text() soup = BeautifulSoup(html1, 'lxml') # Read the PDF and save text in pdfString. url2 = "http://eprints.lse.ac.uk/86880/7/Cox_Rise%20of%20populism%20published_2018.pdf" pdf2 = open(url2, 'rb') fileReader = PyPDF2.PdfFileReader(pdf2) pdfString = "" for x in range(11): pageObj = fileReader.getPage(x) pdfString = pdfString + pageObj.extractText() # Print text from url2.I closed the text, but You can open it.#print(pdfString)
from passwordmeter import test from urllib2 from urlopen from os.path import isfile from random import choice,randint if not isfile('words.txt') print 'Downloading words.txt...' url='https://raw.githubusercontent.com/dwyl/english-words/master/words.txt' with open('words.txt', 'w') as f: f.write(urlopen(url).read()) words=open('words.txt', 'r').read().split("\n") special_chars=['!','?'] def create_password(num_words=2,num_num,bers=4,num_special=1): pass_str='' for _ inxranf(num_words): passs_str=+choice(words).lower().capalize() for _ in xrange(num_numbers): pass_str+=str(randint(0,9)) for _ in xrange(num_special): pass_str+=choice(speciam_chars) def main(); pass_str=create_password() strength,_=test(pass_str)
#!/usr/bin/env python # sample usage: checksites.py eriwen.com nixtutor.com yoursite.org import pickleimport osimport loggingimport timeimport refrom optparse import OptionParser, OptionValueErrorfrom smtplib import SMTPfrom getpass import getuserfrom socket import gethostname, setdefaulttimeout try: from urllib2 import urlopenexcept ImportError: from urllib.request import urlopen def generate_email_alerter(to_addrs, from_addr=None, use_gmail=False, username=None, password=None, hostname=None, port=25): if not from_addr: from_addr = getuser() + "@" + gethostname() if use_gmail: if username and password: server = SMTP('smtp.gmail.com', 587) server.starttls() else: raise OptionValueError('You must provide a username and password to use GMail') else: if hostname: server = SMTP(hostname, port) else: server = SMTP() # server.connect() server.starttls() if username and password: server.login(username, password) def email_alerter(message, subject='You have an alert'): server.sendmail(from_addr, to_addrs, 'To: %s\r\nFrom: %s\r\nSubject: %s\r\n\r\n%s' % (", ".join(to_addrs), from_addr, subject, message)) return email_alerter, server.quit def get_site_status(url): try: urlfile = urlopen(url) status_code = urlfile.code if status_code in (200, 302): return 'up', urlfile except: pass return 'down', None def get_headers(url): '''Gets all headers from URL request and returns''' try: return urlopen(url).info().as_string() except: return 'Headers unavailable' def compare_site_status(prev_results, alerter): '''Report changed status based on previous results''' def is_status_changed(url): startTime = time.time() status, urlfile = get_site_status(url) endTime = time.time() elapsedTime = endTime - startTime msg = "%s took %s" % (url, elapsedTime) logging.info(msg) if status != "up": elapsedTime = -1 friendly_status = '%s is %s. Response time: %s' % ( url, status, elapsedTime) print(friendly_status) if url in prev_results and prev_results[url]['status'] != status: logging.warning(status) # Email status messages alerter(str(get_headers(url)), friendly_status) # Create dictionary for url if one doesn't exist (first time url was # checked) if url not in prev_results: prev_results[url] = {} # Save results for later pickling and utility use prev_results[url]['status'] = status prev_results[url]['headers'] = None if urlfile is None else urlfile.info().headers prev_results[url]['rtime'] = elapsedTime return is_status_changed def is_internet_reachable(): '''Checks Google then Yahoo just in case one is down''' statusGoogle, urlfileGoogle = get_site_status('http://www.google.com') statusYahoo, urlfileYahoo = get_site_status('http://www.yahoo.com') if statusGoogle == 'down' and statusYahoo == 'down': return False return True def load_old_results(file_path): '''Attempts to load most recent results''' pickledata = {} if os.path.isfile(file_path): picklefile = open(file_path, 'rb') pickledata = pickle.load(picklefile) picklefile.close() return pickledata def store_results(file_path, data): '''Pickles results to compare on next run''' output = open(file_path, 'wb') pickle.dump(data, output) output.close() def normalize_url(url): '''If a url doesn't have a http/https prefix, add http://''' if not re.match('^http[s]?://', url): url = 'http://' + url return url
def make_soup(url): html = urlopen(url).read() return BeautifulSoup(html)
with open('input.json,'r') as f: inputjson = json.load(f) Remove element from json object Json to python conversion Object -> Dict Array -> List String ->str false ->False null -> None #Remove element crime from state Object for state in inputjson['states']: del state['crime'] #export python dict to Json file with open('output.json','w') as w: json.dump(inputjson,w,indent=2) #### get and parse json from feed or site import jsonfrom urllib.request import urlopen with urlopen('https://mysite') as response: source = response.read() #convert string to python object data = json.loads(source)
"*** = %%%()": "Set *** to an instance of class %%%.", "***.***(@@@)": "From *** get the *** function, call it with params self, @@@.", "***.*** = '***'": "From *** get the *** attribute and set it to '***'." } # drill phrases first? if len(sys.argv) == 2 and sys.argv[1] == "english": phrase_first = True else: phrase_first = False # load words from the website for word in urlopen(word_url).readlines(): words.append(str(word.strip(), encoding="utf-8")) def convert(snippet,phrase): class_names = [w.capitalize() for w in random.sample(words, snippet.count("%%%"))] other_names = random.sample(words, snippet.count("***")) results = [] param_names = [] for i in range(0, snippet.count("@@@")): param_count = random.randint(1,3) param_names.append(', '.join( random.sample(words, param_count)))
} x = requests.post(url, data=objeto) print(x.text) '''---------------------------------------------------------------''' today = date.today() d1 = today.strftime("%d-%m-%Y") headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3' } req = Request(url='http://www.elcomparador.com/futbol/' + str(d1), headers=headers) html = urlopen(req).read() soup2 = BeautifulSoup(html) contSurebets = 0 contPartidos = 0 tomorrow = today + datetime.timedelta(days=1) print(tomorrow) div_contenedor = soup2.find("div", {"id": "contenedor_lista_partidos"}) div_partido = div_contenedor.findAll("div", {"id": "contenedor_evento"}) for partido in div_partido: fila_evento = partido.find("div", {"id": "fila_evento"}) celda_evento_fecha = fila_evento.find("div", {"id": "celda_evento_fecha"}) horas = celda_evento_fecha.findAll("span", {"class": "hora"}) hora_text = '' for hora in horas: hora_text = hora.text
# -*-coding:utf-8 -*- import urlopen #引入urllib2下的urlopen类,这个类可以对url进行操作,打开一个url链接 resp = urlopen( 'https://en.wikipedia.org/wiki/List_of_bicycle-sharing_systems#Cities') html_data = resp.read().decode('utf-8') print(html_data)
def leer(): today = date.today() d1 = today.strftime("%d-%m-%Y") headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3' } req = Request(url='http://www.elcomparador.com/futbol/' + str(d1), headers=headers) html = urlopen(req).read() soup2 = BeautifulSoup(html) contSurebets = 0 contPartidos = 0 tomorrow = today + datetime.timedelta(days=1) print(tomorrow) div_contenedor = soup2.find("div", {"id": "contenedor_lista_partidos"}) div_partido = div_contenedor.findAll("div", {"id": "contenedor_evento"}) for partido in div_partido: fila_evento = partido.find("div", {"id": "fila_evento"}) celda_evento_fecha = fila_evento.find("div", {"id": "celda_evento_fecha"}) horas = celda_evento_fecha.findAll("span", {"class": "hora"}) hora_text = '' for hora in horas: hora_text = hora.text celda_evento_partido = fila_evento.find("div", {"id": "celda_evento_partido"}) franja_equipos = celda_evento_partido.findAll("span", {"class": "equipo"}) contNombre = 0 team1 = '' team2 = '' odd1 = 0 odd2 = 0 odd3 = 0 bookie1 = '' bookie1_id = 0 bookie2 = '' bookie2_id = 0 bookie3 = '' bookie3_id = 0 bookie = '' bookieId = 0 celda_evento_cuotas = fila_evento.find("div", {"id": "celda_evento_cuotas"}) contenedor_cuotas = celda_evento_cuotas.findAll( "div", {"id": "contenedor_cuotas"}) for contenedor_cuota in contenedor_cuotas: fila_cuotas = contenedor_cuota.findAll("div", {"id": "fila_cuotas"}) contCuota = 0 for fila_cuota in fila_cuotas: celda_cuotas = fila_cuota.find("div", {"class": "verde"}) if celda_cuotas is not None: a_link = celda_cuotas.find('a') link = a_link['href'] if "bet365" in link: bookie = "bet365" bookieId = "Bet365" if "codere" in link: bookie = "codere" bookieId = "Codere" if "bwin" in link: bookie = "bwin" bookieId = "BWin" if "marathonbet" in link: bookie = "marathon bet" bookieId = "MarathonBet" if "luckia" in link: bookie = "luckia" bookieId = "Luckia" if "sportium" in link: bookie = "sportium" bookieId = "Sportium" if "betway" in link: bookie = "betway" bookieId = "Betway" if "marcaapuestas" in link: bookie = "marca apuestas" bookieId = "MarcarApuestas" if "willhill" in link: bookie = "william hill" bookieId = "WilliamHill" if "sport888" in link: bookie = "888 sport" bookieId = "888Sport" if "betfair" in link: bookie = "betfair" bookieId = "Betfair" if "interwetten" in link: bookie = "interwetten" bookieId = "Interwetten" if contCuota == 0: odd1 = float(celda_cuotas.text) bookie1 = bookie bookie1_id = bookieId if contCuota == 1: odd2 = float(celda_cuotas.text) bookie2 = bookie bookie2_id = bookieId if contCuota == 2: odd3 = float(celda_cuotas.text) bookie3 = bookie bookie3_id = bookieId contCuota = contCuota + 1 for equipo in franja_equipos: if contNombre == 0: team1 = equipo.text if contNombre == 1: team2 = equipo.text contNombre = contNombre + 1 if hora_text != '': if odd1 != 0 and odd2 != 0 and odd3 != 0: contPartidos = contPartidos + 1 percentage = (1 / odd1) + (1 / odd2) + (1 / odd3) if percentage < 1: contPartidos = contPartidos + 1 match = team1 + " vs " + team2 percentage = percentage * 100 percentage = 100 - percentage print(match + " " + str(percentage)) print(bookie1 + " " + str(odd1)) print(bookie2 + " " + str(odd2)) print(bookie3 + " " + str(odd3)) enviarPost(match, d1, team1, team2, odd1, odd2, odd3, bookie1_id, bookie2_id, bookie3_id, percentage, today) print("bucle")
def cuotasFutbol(link): arrayLink = link.split("/") sport = arrayLink[5] country = arrayLink[4] league = arrayLink[6] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'} req = Request( url=link, headers=headers) html = urlopen(req).read() soup2 = BeautifulSoup(html) content = soup2.find("div", {"id": "content"}) main_column = content.find("div", {"class": "main-column"}) article = main_column.find("article") matches_table = article.find("table") tbody = matches_table.find("tbody") trs = tbody.findAll("tr") for tr in trs: tds = tr.findAll("td") cont = 0 fecha = '' hora = '' match = '' cuota1 = 0 cuota2 = 0 cuota3 = 0 casa1 = '' casa2 = '' casa3 = '' for td in tds: if cont == 0: fecha = td.find("span", {"class": "date"}) hora = td.find("span", {"class": "time"}) if fecha is not None: fecha = fecha.text hora = hora.text if cont == 1: partido = td.find("a") if partido is not None: match = partido.text.strip() if cont == 2: cuota1 = td.find("span") if cuota1 is not None: casa1 = td.find("img") cuota1 = float(cuota1.text) casa1 = casa1["alt"] if cont == 3: cuota2 = td.find("span") if cuota2 is not None: casa2 = td.find("img") cuota2 = float(cuota2.text) casa2 = casa2["alt"] if cont == 4: cuota3 = td.find("span") if cuota3 is not None: casa3 = td.find("img") cuota3 = float(cuota3.text) casa3 = casa3["alt"] if cuota1 is not None and cuota2 is not None and cuota3 is not None: if cuota1 > 0 and cuota2 > 0 and cuota3 > 0: percentage = (1/cuota1)+(1/cuota2)+(1/cuota3) arrayFecha = fecha.split(" ") mes = 0 dia = 0 ano = 0 if arrayFecha[1] == "Feb": mes = "02" dia = int(arrayFecha[0].replace(",", "")) ano = int(arrayFecha[2]) fecha = str(ano)+"-"+str(mes)+"-"+str(dia)+" "+str(hora) equipoarray = match.split(" — ") team1 = equipoarray[0] team2 = equipoarray[1] odd1 = cuota1 odd2 = cuota2 odd3 = cuota3 bookie1 = casa1 bookie2 = casa2 bookie3 = casa2 if bookie1 == "William Hill": bookie1 = "WilliamHill" if bookie2 == "William Hill": bookie2 = "WilliamHill" if bookie3 == "William Hill": bookie3 = "WilliamHill" enviarPost(match, fecha, team1, team2, odd1, odd2, odd3, bookie1, bookie2, bookie3, percentage, sport, country, league) cont = cont+1
import numpy as np import urlopen # url with dataset url = "http://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data" # download the file raw_data = urlopen(url) # load the CSV file as a numpy matrix dataset = np.loadtxt(raw_data, delimiter=",") # separate the data from the target attributes X = dataset[:, 0:7] y = dataset[:, 8] print(raw_data)
f = open('pets.txt', 'r') pets = json.loads(f.read()) f.close() pprint(pets) ######################################## #Read page with a slice rom urllib2 import urlopen # Add your code here! website = urlopen('http://placekitten.com/') kittens = website.read() print kittens[559:1000] ######################################## #Poll NPR by story ID# and print story titles from urllib2 import urlopen from json import load url = "http://api.npr.org/query?apiKey=" key = "API_KEY"
dfrom urllib.request import urlopen html = urlopen("http://www.baidu.com") print('hello world') print(html.read())
import urlopen from bs4 import BeautifulSoupimport from bs4 import BeautifulSoup import requests url = "https://www.americanas.com.br/" html = urlopen("http://www.pythonscraping.com/pages/page3.html") #request = requests.get(url) #soup4 = BeautifulSoup(request.text,'lxml') #verifyId = soup4.find('div', id='sas_30352') #verifyClass = soup4.find('div', class_="card-product-image placeholder picture") bsObj = BeautifulSoup(html) images = bsObj.findAll("img", {"src":re.compile("\.\.\/img\/gifts/img.*\.jpg")}) for image in images: print(image["src"]) print(verifyClass.split())