def setDictionary(word): try: api_key = '750c98f0-f83f-4604-a78d-9065f53e5804' url = "https://www.dictionaryapi.com/api/v3/references/sd2/json/" + word.lower( ) + "?key=" + api_key resp = reqGet(url) data = jloads(resp.text) defis = data[0]['def'][0]['sseq'] count = 0 file = open(savedir + "\\dictionary.txt", "w") for defi in defis: count += 1 if (count >= 4): break text = defi[0][1]['dt'][0] text = text[1][4:] + "." text = text.replace("{it}", "\"") text = text.replace("{/it}", "\"") text = text.replace("{bc}", "") text = text.replace("{sx", "") text = text.replace("|}", "") text = text.replace("}", "|") file.write("=> " + text + " #&# ") file.write("< " + data[0]['fl'] + " > #&# ") file.close() global c c = '+' except: file = open(savedir + "\\dictionary.txt", "w") file.write("Oops! Mr.Tsaurus could not find the word! #&# ") file.close()
def setImage(word): #---------------# try: img_url = "" img_url = "https://www.pixabay.com/en/photos/" + word + "/" htmlContents = reqGet(img_url).content soup = BeautifulSoup(htmlContents, 'lxml') image_tags = soup.find_all('img') count = 0 file = open(savedir + "\\imageurls.txt", "w") for image_tag in image_tags: if count >= 4: break img_src = image_tag.get('src') if ('svg' not in img_src) and ('gif' not in img_src): file.write(img_src + " #&# ") count += 1 if count < 4: rem = 4 - count for i in range(rem): file.write(savedir + "\\errorImage.jpg" + " #&# ") file.close() global a a = '+' except: file = open(savedir + "\\imageurls.txt", "w") for i in range(5): file.write(savedir + "\\errorImage.jpg" + " #&# ")
def setImage(word): #---------------# try: img_url = "" img_url = "https://www.pixabay.com/en/photos/" +word + "/" htmlContents = reqGet(img_url).content soup = BeautifulSoup(htmlContents,'lxml') image_tags = soup.find_all('img') count = 0 # file = open(savedir+"\imageurls.txt","w") file = open("D:\\tsaurus\\Tsaurus-teaching-assistant-for-Kids-using-OCR-and-webscrapping\\temp"+"\\imageurls.txt","w") for image_tag in image_tags: if count>=4: break img_src = image_tag.get('src') print(img_src) if ('svg' not in img_src) and ('gif' not in img_src): file.write(img_src+" #&# ") db.set_img_url(img_src+" #&# ") count += 1 if count < 4: rem = 4 - count for i in range(rem): file.write(savedir+"\errorImage.jpg"+" #&# ") db.set_img_url(savedir+"\errorImage.jpg"+" #&# ") file.close() global a a = '+' except: file = open(savedir+"\imageurls.txt","w") for i in range(5): file.write(savedir+"\errorImage.jpg"+" #&# ")
def setDictionary(word): try: api_key = '750c98f0-f83f-4604-a78d-9065f53e5804' url = "https://www.dictionaryapi.com/api/v3/references/sd2/json/" + word.lower( ) + "?key=" + api_key resp = reqGet(url) data = jloads(resp.text) if data: global c c = '+' # status defis = data[0]['def'][0]['sseq'] count = 0 dict_string = "" for defi in defis: count += 1 if (count >= 4): break text = defi[0][1]['dt'][0] text = text[1][4:] + "." text = text.replace("{it}", "\"") text = text.replace("{/it}", "\"") text = text.replace("{bc}", " ") text = text.replace("{sx", " ") text = text.replace("|}", " ") text = text.replace("}", " ") text = sub('[' + punctuation + ']', ' ', text) text = sub('[0-9]+', ' ', text) dict_string += str("=> " + text + "<br>") dict_string += str("< " + data[0]['fl'] + " > <br> ") db.set_dic_text(dict_string) db.set_dic_partOfSpeech(data[0]['fl']) except: pass
def setImage(word): #---------------# try: img_url = "" img_url = "https://www.pixabay.com/en/photos/" + word + "/" htmlContents = reqGet(img_url).content soup = BeautifulSoup(htmlContents, 'lxml') image_tags = soup.find_all('img') count = 0 if len(image_tags): global a a = '+' for image_tag in image_tags: if count >= 6: break img_src = image_tag.get('src') if ('svg' not in img_src) and ('gif' not in img_src): db.set_img_url(img_src) count += 1 if count < 6: rem = 6 - count for i in range(rem): img_src = savedir + "\\errorImage.jpg" db.set_img_url(img_src) except: for i in range(6): error_img_src = savedir + "\\errorImage.jpg" db.set_img_url(error_img_src)
def setDescription(word): #--------------# try: url = "" url = 'https://www.britannica.com/search?query=' + word content = reqGet(url).content soup = BeautifulSoup(content, 'lxml') div_tag = soup.find("li", class_="mb-45") a = div_tag.find('a') a['href'] = "https://www.britannica.com" + a['href'] db.set_description(str(div_tag)) global b b = '+' except: #"Oops! Mr. Taurus could not find the word in encyclopedia ! <br> ") pass
def getAPOD() -> str: print(" apodwallpaper.py -> getAPOD - Downloading APOD image URL") photo = None # Used DEMO_KEY as the api_key since the constraints are based on IP response = reqGet('https://api.nasa.gov/planetary/apod', params={'api_key': 'DEMO_KEY'}) # Status code check if response.status_code == 200: print(" apodwallpaper.py -> getAPOD - Download successful") photo = response.json()['hdurl'] else: print( " apodwallpaper.py -> getAPOD - ERROR: Can't get the APOD image URL" ) print(" apodwallpaper.py -> getAPOD - ERROR CODE:", response.status_code) return photo
def setDescription(word): #--------------# try: url = "" url = 'https://www.britannica.com/search?query=' + word content = reqGet(url).content soup = BeautifulSoup(content, 'lxml') div_tag = soup.find("li", class_="mb-45") a = div_tag.find('a') a['href'] = "https://www.britannica.com" + a['href'] file = open(savedir + "\\description.txt", "w") file.write(str(div_tag)) file.close() global b b = '+' except: file = open(savedir + "\\description.txt", "w") file.write( "Oops! Mr. Taurus could not find the word in encyclopedia ! #&# ") file.close()
def monitor(self): while self.continueMonitor: sleep(60) newbs = BeautifulSoup(reqGet(self.url).text, 'html.parser') newbsTags = newbs.find_all() ### Jeśli tagi mają jakieś dziecko które nie jest navigable stringiem i te same argumenty i nazwę ### to ich nie rozpatrujemy, wiadomo wtedy że zmiana musiała zajść gdzieś niżej i tę zmianę niżej wypiszemy i = 0 while i < len(self.bsTags): j = 0 while j < len(newbsTags): nonNavigChildsTag = list( filter(lambda x: not (type(x) == NavigableString), self.bsTags[i].contents)) nonNavigChildsNewtag = list( filter(lambda x: not (type(x) == NavigableString), newbsTags[j].contents)) ### tyle samo dzieci które nie jest navigable stringiem i ta liczba >0 if len(nonNavigChildsTag) == len(nonNavigChildsNewtag): if (len(nonNavigChildsTag) == 0) and self.bsTags[i] is not newbsTags[j]: j += 1 continue elif (self.bsTags[i].name != newbsTags[j].name or self.bsTags[i].attr != newbsTags[j].attr): j += 1 continue del self.bsTags[i] i -= 1 del newbsTags[j] break j += 1 i += 1 for el in self.bsTags: print(f"Removed: {el.text}") for el in newbsTags: print(f"Added: {el.text}") self.bsObj = newbs self.bsTags = newbsTags
def downloadAPOD(apodURL, apodPath) -> int: print(" apodwallpaper.py -> downloadAPOD - Downloading APOD image") result = 0 # Download APOD image response = reqGet(apodURL) if response.status_code == 200: print(" apodwallpaper.py -> downloadAPOD - Download successful") # Write new image (overwriting the older one) with open(apodPath, 'wb') as apodImage: apodImage.write(response.content) apodImage.truncate() else: # Error occurred => Print error messages result = 1 print( " apodwallpaper.py -> downloadAPOD - ERROR: Download of APOD image failed" ) print(" apodwallpaper.py -> downloadAPOD - ERROR CODE:", response.status_code) return result
def setDescription(word): #--------------# try: url = "" url = 'https://www.britannica.com/search?query='+ word content = reqGet(url).content soup = BeautifulSoup(content,'lxml') div_tag = soup.find("li",class_="mb-45") a = div_tag.find('a') a['href'] = "https://www.britannica.com" + a['href'] # file = open(savedir+"\description.txt","w") file = open("D:\\tsaurus\\Tsaurus-teaching-assistant-for-Kids-using-OCR-and-webscrapping\\temp"+"\\description.txt","w") file.write(str(div_tag)) # print("\n"+"this is what you are looking for ===>>>>>>"+str(div_tag)) # print("end of printing") db.set_description(str(div_tag)); file.close() global b b = '+' except: file = open(savedir+"\description.txt","w") file.write("Oops! Mr. Taurus could not find the word in encyclopedia ! #&# ") file.close()
def setDictionary(word): try: api_key = '750c98f0-f83f-4604-a78d-9065f53e5804' url = "https://www.dictionaryapi.com/api/v3/references/sd2/json/"+ word.lower() + "?key=" + api_key resp = reqGet(url) data = jloads(resp.text) defis = data[0]['def'][0]['sseq'] count = 0 # file = open(savedir+"\\dictionary.txt","w") file = open("D:\\tsaurus\\Tsaurus-teaching-assistant-for-Kids-using-OCR-and-webscrapping\\temp"+"\\dictionary.txt","w") for defi in defis: count += 1 if(count>=4): break text = defi[0][1]['dt'][0] text = text[1][4:]+"." text = text.replace("{it}", "\"") text = text.replace("{/it}","\"") text = text.replace("{bc}","") text = text.replace("{sx","") text = text.replace("|}","") text = text.replace("}","|") file.write("=> " + text + " #&# ") # global db db.set_dic_text("=>"+text+" #&# "); # print( text +" <=text" ) file.write( "< " + data[0]['fl'] + " > #&# ") # print( data[0]['fl']+" <=dara[0]" ) # global db db.set_dic_parOfSpeech( "< " + data[0]['fl'] + " > #&# ") file.close() global c c = '+' except: x_ferb=1
#A_Z word list url aquire import requests import bs4 from requests import get as reqGet from bs4 import BeautifulSoup as soup headers = {'Authorization': 'Basic ZW5nbGlzaHByb2ZpbGU6dm9jYWJ1bGFyeQ=='} wordList = 'http://vocabulary.englishprofile.org/dictionary//word-list/uk/c2/A' responWL = reqGet (wordList, headers=headers) page_html_WL = responWL.content #parse the html page_soup_WL = soup (page_html_WL,'html.parser') #get the urls in tags as :<span><a href="/dictionary/word-list/uk/c2/A" title="A">A</a></span> urlA_Z = page_soup_WL.find('div',{'id':'letters'}) urlA_Zurl = urlA_Z.find_all('a') print (urlA_Zurl) UrlSet = list() for index in range(len(urlA_Zurl)): aLineEle = urlA_Zurl[index] UrlEle = aLineEle['href'] list.append(UrlSet,UrlEle) print (UrlSet) #write the urls into a file import pickle pkF = open(r'C:\myfirstgrab\picUrlSet.pk','wb') pickle.dump(UrlSet, pkF) pkF.close() #good for now #urls of ranges of initial letters
def setup(self): self.bsObj = BeautifulSoup(reqGet(self.url).text, 'html.parser') self.bsTags = self.bsObj.find_all()
def addSiteToLookAfter(self, site): self.sitesToWatch[site] = BeautifulSoup( reqGet(site).text, 'html.parser')