def setDictionary(word):
    try:
        api_key = '750c98f0-f83f-4604-a78d-9065f53e5804'
        url = "https://www.dictionaryapi.com/api/v3/references/sd2/json/" + word.lower(
        ) + "?key=" + api_key
        resp = reqGet(url)
        data = jloads(resp.text)
        defis = data[0]['def'][0]['sseq']
        count = 0
        file = open(savedir + "\\dictionary.txt", "w")

        for defi in defis:
            count += 1
            if (count >= 4):
                break
            text = defi[0][1]['dt'][0]
            text = text[1][4:] + "."
            text = text.replace("{it}", "\"")
            text = text.replace("{/it}", "\"")
            text = text.replace("{bc}", "")
            text = text.replace("{sx", "")
            text = text.replace("|}", "")
            text = text.replace("}", "|")
            file.write("=> " + text + " #&#  ")
        file.write("< " + data[0]['fl'] + " > #&#  ")
        file.close()
        global c
        c = '+'
    except:
        file = open(savedir + "\\dictionary.txt", "w")
        file.write("Oops! Mr.Tsaurus could not find the word! #&#  ")
        file.close()
def setImage(word):  #---------------#
    try:
        img_url = ""
        img_url = "https://www.pixabay.com/en/photos/" + word + "/"
        htmlContents = reqGet(img_url).content
        soup = BeautifulSoup(htmlContents, 'lxml')
        image_tags = soup.find_all('img')
        count = 0
        file = open(savedir + "\\imageurls.txt", "w")
        for image_tag in image_tags:
            if count >= 4:
                break
            img_src = image_tag.get('src')
            if ('svg' not in img_src) and ('gif' not in img_src):
                file.write(img_src + " #&#  ")
                count += 1
        if count < 4:
            rem = 4 - count
            for i in range(rem):
                file.write(savedir + "\\errorImage.jpg" + " #&#  ")
        file.close()
        global a
        a = '+'
    except:
        file = open(savedir + "\\imageurls.txt", "w")
        for i in range(5):
            file.write(savedir + "\\errorImage.jpg" + " #&#  ")
def setImage(word):        #---------------#
    try:
        img_url = "" 
        img_url = "https://www.pixabay.com/en/photos/" +word + "/"
        htmlContents = reqGet(img_url).content
        soup  =  BeautifulSoup(htmlContents,'lxml')
        image_tags = soup.find_all('img')
        count = 0
     #   file = open(savedir+"\imageurls.txt","w")
        file = open("D:\\tsaurus\\Tsaurus-teaching-assistant-for-Kids-using-OCR-and-webscrapping\\temp"+"\\imageurls.txt","w")
        for image_tag in image_tags:
            if count>=4:
                break
            img_src = image_tag.get('src')
            print(img_src)
            if ('svg' not in img_src) and ('gif' not in img_src):
                file.write(img_src+" #&#  ")
                db.set_img_url(img_src+" #&#  ")
                count += 1
        if count < 4:
            rem = 4 - count
            for i in range(rem):
                file.write(savedir+"\errorImage.jpg"+" #&#  ")
                db.set_img_url(savedir+"\errorImage.jpg"+" #&#  ")
        file.close()
        global a 
        a = '+'
    except:
        file = open(savedir+"\imageurls.txt","w")
        for i in range(5):
            file.write(savedir+"\errorImage.jpg"+" #&#  ")
def setDictionary(word):
    try:
        api_key = '750c98f0-f83f-4604-a78d-9065f53e5804'
        url = "https://www.dictionaryapi.com/api/v3/references/sd2/json/" + word.lower(
        ) + "?key=" + api_key
        resp = reqGet(url)
        data = jloads(resp.text)
        if data:
            global c
            c = '+'  # status
        defis = data[0]['def'][0]['sseq']
        count = 0
        dict_string = ""
        for defi in defis:
            count += 1
            if (count >= 4):
                break
            text = defi[0][1]['dt'][0]
            text = text[1][4:] + "."
            text = text.replace("{it}", "\"")
            text = text.replace("{/it}", "\"")
            text = text.replace("{bc}", " ")
            text = text.replace("{sx", " ")
            text = text.replace("|}", " ")
            text = text.replace("}", " ")
            text = sub('[' + punctuation + ']', ' ', text)
            text = sub('[0-9]+', ' ', text)
            dict_string += str("=> " + text + "<br>")
        dict_string += str("< " + data[0]['fl'] + " >  <br> ")
        db.set_dic_text(dict_string)
        db.set_dic_partOfSpeech(data[0]['fl'])

    except:
        pass
def setImage(word):  #---------------#
    try:
        img_url = ""
        img_url = "https://www.pixabay.com/en/photos/" + word + "/"
        htmlContents = reqGet(img_url).content
        soup = BeautifulSoup(htmlContents, 'lxml')
        image_tags = soup.find_all('img')
        count = 0
        if len(image_tags):
            global a
            a = '+'
        for image_tag in image_tags:
            if count >= 6:
                break
            img_src = image_tag.get('src')
            if ('svg' not in img_src) and ('gif' not in img_src):
                db.set_img_url(img_src)
                count += 1
        if count < 6:
            rem = 6 - count
            for i in range(rem):
                img_src = savedir + "\\errorImage.jpg"
                db.set_img_url(img_src)
    except:
        for i in range(6):
            error_img_src = savedir + "\\errorImage.jpg"
            db.set_img_url(error_img_src)
def setDescription(word):  #--------------#
    try:
        url = ""
        url = 'https://www.britannica.com/search?query=' + word
        content = reqGet(url).content
        soup = BeautifulSoup(content, 'lxml')
        div_tag = soup.find("li", class_="mb-45")
        a = div_tag.find('a')
        a['href'] = "https://www.britannica.com" + a['href']
        db.set_description(str(div_tag))
        global b
        b = '+'
    except:
        #"Oops! Mr. Taurus could not find the word in encyclopedia ! <br>  ")
        pass
def getAPOD() -> str:
    print(" apodwallpaper.py -> getAPOD - Downloading APOD image URL")
    photo = None
    # Used DEMO_KEY as the api_key since the constraints are based on IP
    response = reqGet('https://api.nasa.gov/planetary/apod',
                      params={'api_key': 'DEMO_KEY'})
    # Status code check
    if response.status_code == 200:
        print(" apodwallpaper.py -> getAPOD - Download successful")
        photo = response.json()['hdurl']
    else:
        print(
            " apodwallpaper.py -> getAPOD - ERROR: Can't get the APOD image URL"
        )
        print(" apodwallpaper.py -> getAPOD - ERROR CODE:",
              response.status_code)
    return photo
def setDescription(word):  #--------------#
    try:
        url = ""
        url = 'https://www.britannica.com/search?query=' + word
        content = reqGet(url).content
        soup = BeautifulSoup(content, 'lxml')
        div_tag = soup.find("li", class_="mb-45")
        a = div_tag.find('a')
        a['href'] = "https://www.britannica.com" + a['href']
        file = open(savedir + "\\description.txt", "w")
        file.write(str(div_tag))
        file.close()
        global b
        b = '+'
    except:
        file = open(savedir + "\\description.txt", "w")
        file.write(
            "Oops! Mr. Taurus could not find the word in encyclopedia ! #&#  ")
        file.close()
Example #9
0
    def monitor(self):
        while self.continueMonitor:
            sleep(60)

            newbs = BeautifulSoup(reqGet(self.url).text, 'html.parser')
            newbsTags = newbs.find_all()

            ### Jeśli tagi mają jakieś dziecko które nie jest navigable stringiem i te same argumenty i nazwę
            ### to ich nie rozpatrujemy, wiadomo wtedy że zmiana musiała zajść gdzieś niżej i tę zmianę niżej wypiszemy
            i = 0
            while i < len(self.bsTags):
                j = 0
                while j < len(newbsTags):
                    nonNavigChildsTag = list(
                        filter(lambda x: not (type(x) == NavigableString),
                               self.bsTags[i].contents))
                    nonNavigChildsNewtag = list(
                        filter(lambda x: not (type(x) == NavigableString),
                               newbsTags[j].contents))
                    ### tyle samo dzieci które nie jest navigable stringiem i ta liczba >0
                    if len(nonNavigChildsTag) == len(nonNavigChildsNewtag):
                        if (len(nonNavigChildsTag)
                                == 0) and self.bsTags[i] is not newbsTags[j]:
                            j += 1
                            continue
                        elif (self.bsTags[i].name != newbsTags[j].name
                              or self.bsTags[i].attr != newbsTags[j].attr):
                            j += 1
                            continue
                        del self.bsTags[i]
                        i -= 1
                        del newbsTags[j]
                        break
                    j += 1
                i += 1

            for el in self.bsTags:
                print(f"Removed: {el.text}")
            for el in newbsTags:
                print(f"Added: {el.text}")
            self.bsObj = newbs
            self.bsTags = newbsTags
def downloadAPOD(apodURL, apodPath) -> int:
    print(" apodwallpaper.py -> downloadAPOD - Downloading APOD image")
    result = 0
    # Download APOD image
    response = reqGet(apodURL)
    if response.status_code == 200:
        print(" apodwallpaper.py -> downloadAPOD - Download successful")
        # Write new image (overwriting the older one)
        with open(apodPath, 'wb') as apodImage:
            apodImage.write(response.content)
            apodImage.truncate()
    else:
        # Error occurred => Print error messages
        result = 1
        print(
            " apodwallpaper.py -> downloadAPOD - ERROR: Download of APOD image failed"
        )
        print(" apodwallpaper.py -> downloadAPOD - ERROR CODE:",
              response.status_code)
    return result
def setDescription(word):      #--------------#
    try:
        url = ""
        url = 'https://www.britannica.com/search?query='+ word
        content = reqGet(url).content
        soup = BeautifulSoup(content,'lxml')
        div_tag = soup.find("li",class_="mb-45")
        a = div_tag.find('a')
        a['href'] = "https://www.britannica.com" + a['href']
       # file = open(savedir+"\description.txt","w")
        file = open("D:\\tsaurus\\Tsaurus-teaching-assistant-for-Kids-using-OCR-and-webscrapping\\temp"+"\\description.txt","w")
        file.write(str(div_tag))
      #  print("\n"+"this is what you are looking for ===>>>>>>"+str(div_tag))
       # print("end of printing")
        db.set_description(str(div_tag));
        file.close()
        global b
        b = '+'
    except:
        file = open(savedir+"\description.txt","w")
        file.write("Oops! Mr. Taurus could not find the word in encyclopedia ! #&#  ")
        file.close()
def setDictionary(word):
    try:
        
        api_key = '750c98f0-f83f-4604-a78d-9065f53e5804'
        url = "https://www.dictionaryapi.com/api/v3/references/sd2/json/"+ word.lower() + "?key=" + api_key
        resp = reqGet(url)
        data = jloads(resp.text)
        defis = data[0]['def'][0]['sseq']
        count = 0
       # file = open(savedir+"\\dictionary.txt","w")
        file = open("D:\\tsaurus\\Tsaurus-teaching-assistant-for-Kids-using-OCR-and-webscrapping\\temp"+"\\dictionary.txt","w")
        for defi in defis:
            count += 1 
            if(count>=4):
                break
            text = defi[0][1]['dt'][0]
            text = text[1][4:]+"."
            text = text.replace("{it}", "\"")
            text = text.replace("{/it}","\"")
            text = text.replace("{bc}","")
            text = text.replace("{sx","")
            text = text.replace("|}","")
            text = text.replace("}","|")
            file.write("=> " + text + " #&#  ")
           # global db
            db.set_dic_text("=>"+text+" #&# ");
           # print( text +" <=text" )                 
        file.write( "< " + data[0]['fl'] + " > #&#  ")
       # print( data[0]['fl']+" <=dara[0]" )  
       # global db
        db.set_dic_parOfSpeech( "< " + data[0]['fl'] + " > #&#  ")
        file.close()
        global c 
        c = '+'
    except:
        x_ferb=1
#A_Z word list url aquire
import requests
import bs4
from requests import get as reqGet
from bs4 import BeautifulSoup as soup
headers = {'Authorization': 'Basic ZW5nbGlzaHByb2ZpbGU6dm9jYWJ1bGFyeQ=='}
wordList = 'http://vocabulary.englishprofile.org/dictionary//word-list/uk/c2/A'
responWL = reqGet (wordList, headers=headers)
page_html_WL = responWL.content
#parse the html
page_soup_WL = soup (page_html_WL,'html.parser')
#get the urls in tags as :<span><a href="/dictionary/word-list/uk/c2/A" title="A">A</a></span>
urlA_Z = page_soup_WL.find('div',{'id':'letters'})
urlA_Zurl = urlA_Z.find_all('a') 
print (urlA_Zurl)
UrlSet = list()
for index in range(len(urlA_Zurl)):
	aLineEle = urlA_Zurl[index]
	UrlEle = aLineEle['href']
	list.append(UrlSet,UrlEle)
print (UrlSet)
#write the urls into a file
import pickle
pkF = open(r'C:\myfirstgrab\picUrlSet.pk','wb')
pickle.dump(UrlSet, pkF)
pkF.close()
#good for now



#urls of ranges of initial letters 
Example #14
0
 def setup(self):
     self.bsObj = BeautifulSoup(reqGet(self.url).text, 'html.parser')
     self.bsTags = self.bsObj.find_all()
Example #15
0
 def addSiteToLookAfter(self, site):
     self.sitesToWatch[site] = BeautifulSoup(
         reqGet(site).text, 'html.parser')